""" Base validation utilities for all exchanges. This module provides common validation patterns and base classes that can be extended by exchange-specific validators. """ import re from datetime import datetime, timezone, timedelta from decimal import Decimal, InvalidOperation from typing import Dict, List, Optional, Any, Union, Pattern from abc import ABC, abstractmethod from .data_types import DataValidationResult, StandardizedTrade, TradeSide class ValidationResult: """Simple validation result for individual field validation.""" def __init__(self, is_valid: bool, errors: List[str] = None, warnings: List[str] = None, sanitized_data: Any = None): self.is_valid = is_valid self.errors = errors or [] self.warnings = warnings or [] self.sanitized_data = sanitized_data class BaseDataValidator(ABC): """ Abstract base class for exchange data validators. This class provides common validation patterns and utilities that can be reused across different exchange implementations. """ def __init__(self, exchange_name: str, component_name: str = "base_data_validator", logger = None): """ Initialize base data validator. Args: exchange_name: Name of the exchange (e.g., 'okx', 'binance') component_name: Name for logging logger: Logger instance. If None, no logging will be performed. """ self.exchange_name = exchange_name self.component_name = component_name self.logger = logger # Common validation patterns self._numeric_pattern = re.compile(r'^-?\d*\.?\d+$') self._trade_id_pattern = re.compile(r'^[a-zA-Z0-9_-]+$') # Flexible pattern # Valid trade sides self._valid_trade_sides = {'buy', 'sell'} # Common price and size limits (can be overridden by subclasses) self._min_price = Decimal('0.00000001') # 1 satoshi equivalent self._max_price = Decimal('10000000') # 10 million self._min_size = Decimal('0.00000001') # Minimum trade size self._max_size = Decimal('1000000000') # 1 billion max size # Timestamp validation (milliseconds since epoch) self._min_timestamp = 1000000000000 # 2001-09-09 (reasonable minimum) self._max_timestamp = 9999999999999 # 2286-11-20 (reasonable maximum) if self.logger: self.logger.debug(f"{self.component_name}: Initialized {exchange_name} data validator") # Abstract methods that must be implemented by subclasses @abstractmethod def validate_symbol_format(self, symbol: str) -> ValidationResult: """Validate exchange-specific symbol format.""" pass @abstractmethod def validate_websocket_message(self, message: Dict[str, Any]) -> DataValidationResult: """Validate complete WebSocket message structure.""" pass # Common validation methods available to all subclasses def validate_price(self, price: Union[str, int, float, Decimal]) -> ValidationResult: """ Validate price value with common rules. Args: price: Price value to validate Returns: ValidationResult with sanitized decimal price """ errors = [] warnings = [] sanitized_data = None try: # Convert to Decimal for precise validation if isinstance(price, str) and price.strip() == "": errors.append("Empty price string") return ValidationResult(False, errors, warnings) decimal_price = Decimal(str(price)) sanitized_data = decimal_price # Check for negative prices if decimal_price <= 0: errors.append(f"Price must be positive, got {decimal_price}") # Check price bounds if decimal_price < self._min_price: warnings.append(f"Price {decimal_price} below minimum {self._min_price}") elif decimal_price > self._max_price: warnings.append(f"Price {decimal_price} above maximum {self._max_price}") # Check for excessive decimal places (warn only) if decimal_price.as_tuple().exponent < -12: warnings.append(f"Price has excessive decimal precision: {decimal_price}") except (InvalidOperation, ValueError, TypeError) as e: errors.append(f"Invalid price value: {price} - {str(e)}") return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data) def validate_size(self, size: Union[str, int, float, Decimal]) -> ValidationResult: """ Validate size/quantity value with common rules. Args: size: Size value to validate Returns: ValidationResult with sanitized decimal size """ errors = [] warnings = [] sanitized_data = None try: # Convert to Decimal for precise validation if isinstance(size, str) and size.strip() == "": errors.append("Empty size string") return ValidationResult(False, errors, warnings) decimal_size = Decimal(str(size)) sanitized_data = decimal_size # Check for negative or zero sizes if decimal_size <= 0: errors.append(f"Size must be positive, got {decimal_size}") # Check size bounds if decimal_size < self._min_size: warnings.append(f"Size {decimal_size} below minimum {self._min_size}") elif decimal_size > self._max_size: warnings.append(f"Size {decimal_size} above maximum {self._max_size}") except (InvalidOperation, ValueError, TypeError) as e: errors.append(f"Invalid size value: {size} - {str(e)}") return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data) def validate_volume(self, volume: Union[str, int, float, Decimal]) -> ValidationResult: """ Validate volume value with common rules. Args: volume: Volume value to validate Returns: ValidationResult """ errors = [] warnings = [] try: decimal_volume = Decimal(str(volume)) # Volume can be zero (no trades in period) if decimal_volume < 0: errors.append(f"Volume cannot be negative, got {decimal_volume}") except (InvalidOperation, ValueError, TypeError) as e: errors.append(f"Invalid volume value: {volume} - {str(e)}") return ValidationResult(len(errors) == 0, errors, warnings) def validate_trade_side(self, side: str) -> ValidationResult: """ Validate trade side with common rules. Args: side: Trade side string Returns: ValidationResult """ errors = [] warnings = [] if not isinstance(side, str): errors.append(f"Trade side must be string, got {type(side)}") return ValidationResult(False, errors, warnings) normalized_side = side.lower() if normalized_side not in self._valid_trade_sides: errors.append(f"Invalid trade side: {side}. Must be 'buy' or 'sell'") return ValidationResult(len(errors) == 0, errors, warnings) def validate_timestamp(self, timestamp: Union[str, int], is_milliseconds: bool = True) -> ValidationResult: """ Validate timestamp value with common rules. Args: timestamp: Timestamp value to validate is_milliseconds: True if timestamp is in milliseconds, False for seconds Returns: ValidationResult """ errors = [] warnings = [] try: # Convert to int if isinstance(timestamp, str): if not timestamp.isdigit(): errors.append(f"Invalid timestamp format: {timestamp}") return ValidationResult(False, errors, warnings) timestamp_int = int(timestamp) elif isinstance(timestamp, int): timestamp_int = timestamp else: errors.append(f"Timestamp must be string or int, got {type(timestamp)}") return ValidationResult(False, errors, warnings) # Convert to milliseconds if needed if not is_milliseconds: timestamp_int = timestamp_int * 1000 # Check timestamp bounds if timestamp_int < self._min_timestamp: errors.append(f"Timestamp {timestamp_int} too old") elif timestamp_int > self._max_timestamp: errors.append(f"Timestamp {timestamp_int} too far in future") # Check if timestamp is reasonable (within last year to next year) current_time_ms = int(datetime.now(timezone.utc).timestamp() * 1000) one_year_ms = 365 * 24 * 60 * 60 * 1000 if timestamp_int < (current_time_ms - one_year_ms): warnings.append(f"Timestamp {timestamp_int} is older than 1 year") elif timestamp_int > (current_time_ms + one_year_ms): warnings.append(f"Timestamp {timestamp_int} is more than 1 year in future") except (ValueError, TypeError) as e: errors.append(f"Invalid timestamp: {timestamp} - {str(e)}") return ValidationResult(len(errors) == 0, errors, warnings) def validate_trade_id(self, trade_id: Union[str, int]) -> ValidationResult: """ Validate trade ID with flexible rules. Args: trade_id: Trade ID to validate Returns: ValidationResult """ errors = [] warnings = [] if isinstance(trade_id, int): trade_id = str(trade_id) if not isinstance(trade_id, str): errors.append(f"Trade ID must be string or int, got {type(trade_id)}") return ValidationResult(False, errors, warnings) if not trade_id.strip(): errors.append("Trade ID cannot be empty") return ValidationResult(False, errors, warnings) # Flexible validation - allow alphanumeric, underscore, hyphen if not self._trade_id_pattern.match(trade_id): warnings.append(f"Trade ID has unusual format: {trade_id}") return ValidationResult(len(errors) == 0, errors, warnings) def validate_symbol_match(self, symbol: str, expected_symbol: Optional[str] = None) -> ValidationResult: """ Validate symbol matches expected value. Args: symbol: Symbol to validate expected_symbol: Expected symbol value Returns: ValidationResult """ errors = [] warnings = [] if not isinstance(symbol, str): errors.append(f"Symbol must be string, got {type(symbol)}") return ValidationResult(False, errors, warnings) if expected_symbol and symbol != expected_symbol: warnings.append(f"Symbol mismatch: expected {expected_symbol}, got {symbol}") return ValidationResult(len(errors) == 0, errors, warnings) def validate_orderbook_side(self, side_data: List[List[str]], side_name: str) -> ValidationResult: """ Validate orderbook side (asks or bids) with common rules. Args: side_data: List of price/size pairs side_name: Name of side for error messages Returns: ValidationResult with sanitized data """ errors = [] warnings = [] sanitized_data = [] if not isinstance(side_data, list): errors.append(f"{side_name} must be a list") return ValidationResult(False, errors, warnings) for i, level in enumerate(side_data): if not isinstance(level, list) or len(level) < 2: errors.append(f"{side_name}[{i}] must be a list with at least 2 elements") continue # Validate price and size price_result = self.validate_price(level[0]) size_result = self.validate_size(level[1]) if not price_result.is_valid: errors.extend([f"{side_name}[{i}] price: {error}" for error in price_result.errors]) if not size_result.is_valid: errors.extend([f"{side_name}[{i}] size: {error}" for error in size_result.errors]) # Add sanitized level if price_result.is_valid and size_result.is_valid: sanitized_level = [str(price_result.sanitized_data), str(size_result.sanitized_data)] # Include additional fields if present if len(level) > 2: sanitized_level.extend(level[2:]) sanitized_data.append(sanitized_level) return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data) def validate_standardized_trade(self, trade: StandardizedTrade) -> DataValidationResult: """ Validate a standardized trade object. Args: trade: StandardizedTrade object to validate Returns: DataValidationResult """ errors = [] warnings = [] try: # Validate price price_result = self.validate_price(trade.price) if not price_result.is_valid: errors.extend([f"price: {error}" for error in price_result.errors]) warnings.extend([f"price: {warning}" for warning in price_result.warnings]) # Validate size size_result = self.validate_size(trade.size) if not size_result.is_valid: errors.extend([f"size: {error}" for error in size_result.errors]) warnings.extend([f"size: {warning}" for warning in size_result.warnings]) # Validate side side_result = self.validate_trade_side(trade.side) if not side_result.is_valid: errors.extend([f"side: {error}" for error in side_result.errors]) # Validate trade ID trade_id_result = self.validate_trade_id(trade.trade_id) if not trade_id_result.is_valid: errors.extend([f"trade_id: {error}" for error in trade_id_result.errors]) warnings.extend([f"trade_id: {warning}" for warning in trade_id_result.warnings]) # Validate symbol format (exchange-specific) symbol_result = self.validate_symbol_format(trade.symbol) if not symbol_result.is_valid: errors.extend([f"symbol: {error}" for error in symbol_result.errors]) warnings.extend([f"symbol: {warning}" for warning in symbol_result.warnings]) # Validate timestamp timestamp_ms = int(trade.timestamp.timestamp() * 1000) timestamp_result = self.validate_timestamp(timestamp_ms, is_milliseconds=True) if not timestamp_result.is_valid: errors.extend([f"timestamp: {error}" for error in timestamp_result.errors]) warnings.extend([f"timestamp: {warning}" for warning in timestamp_result.warnings]) return DataValidationResult(len(errors) == 0, errors, warnings) except Exception as e: errors.append(f"Exception during trade validation: {str(e)}") return DataValidationResult(False, errors, warnings) def get_validator_info(self) -> Dict[str, Any]: """Get validator configuration information.""" return { 'exchange': self.exchange_name, 'component': self.component_name, 'limits': { 'min_price': str(self._min_price), 'max_price': str(self._max_price), 'min_size': str(self._min_size), 'max_size': str(self._max_size), 'min_timestamp': self._min_timestamp, 'max_timestamp': self._max_timestamp }, 'patterns': { 'numeric': self._numeric_pattern.pattern, 'trade_id': self._trade_id_pattern.pattern } } # Utility functions for common validation patterns def is_valid_decimal(value: Any) -> bool: """Check if value can be converted to a valid decimal.""" try: Decimal(str(value)) return True except (InvalidOperation, ValueError, TypeError): return False def normalize_symbol(symbol: str, exchange: str) -> str: """ Normalize symbol format for exchange. Args: symbol: Raw symbol string exchange: Exchange name Returns: Normalized symbol string """ # Basic normalization - can be extended per exchange return symbol.upper().strip() def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> List[str]: """ Validate that all required fields are present in data. Args: data: Data dictionary to check required_fields: List of required field names Returns: List of missing field names """ missing_fields = [] for field in required_fields: if field not in data or data[field] is None: missing_fields.append(field) return missing_fields __all__ = [ 'ValidationResult', 'BaseDataValidator', 'is_valid_decimal', 'normalize_symbol', 'validate_required_fields' ]