Add common data processing framework for OKX exchange

- Introduced a modular architecture for data processing, including common utilities for validation, transformation, and aggregation.
- Implemented `StandardizedTrade`, `OHLCVCandle`, and `TimeframeBucket` classes for unified data handling across exchanges.
- Developed `OKXDataProcessor` for OKX-specific data validation and processing, leveraging the new common framework.
- Enhanced `OKXCollector` to utilize the common data processing utilities, improving modularity and maintainability.
- Updated documentation to reflect the new architecture and provide guidance on the data processing framework.
- Created comprehensive tests for the new data processing components to ensure reliability and functionality.
This commit is contained in:
Vasily.onl
2025-05-31 21:58:47 +08:00
parent fa63e7eb2e
commit 8bb5f28fd2
15 changed files with 4015 additions and 214 deletions

52
data/common/__init__.py Normal file
View File

@@ -0,0 +1,52 @@
"""
Common data processing utilities for all exchanges.
This package contains shared components for data validation, transformation,
and aggregation that can be used across different exchange implementations.
"""
from .data_types import (
StandardizedTrade,
OHLCVCandle,
MarketDataPoint,
DataValidationResult
)
from .aggregation import (
TimeframeBucket,
RealTimeCandleProcessor,
CandleProcessingConfig
)
from .transformation import (
BaseDataTransformer,
UnifiedDataTransformer,
create_standardized_trade
)
from .validation import (
BaseDataValidator,
ValidationResult
)
__all__ = [
# Data types
'StandardizedTrade',
'OHLCVCandle',
'MarketDataPoint',
'DataValidationResult',
# Aggregation
'TimeframeBucket',
'RealTimeCandleProcessor',
'CandleProcessingConfig',
# Transformation
'BaseDataTransformer',
'UnifiedDataTransformer',
'create_standardized_trade',
# Validation
'BaseDataValidator',
'ValidationResult'
]

553
data/common/aggregation.py Normal file
View File

@@ -0,0 +1,553 @@
"""
Common aggregation utilities for all exchanges.
This module provides shared functionality for building OHLCV candles
from trade data, regardless of the source exchange.
AGGREGATION STRATEGY:
- Uses RIGHT-ALIGNED timestamps (industry standard)
- Candle timestamp = end time of the interval (close time)
- 5-minute candle with timestamp 09:05:00 represents data from 09:00:01 to 09:05:00
- Prevents future leakage by only completing candles when time boundary is crossed
- Aligns with major exchanges (Binance, OKX, Coinbase)
PROCESS FLOW:
1. Trade arrives with timestamp T
2. Calculate which time bucket this trade belongs to
3. If bucket doesn't exist or time boundary crossed, complete previous bucket
4. Add trade to current bucket
5. Only emit completed candles (never future data)
"""
from datetime import datetime, timezone, timedelta
from decimal import Decimal
from typing import Dict, List, Optional, Any, Iterator, Callable
from collections import defaultdict
from .data_types import (
StandardizedTrade,
OHLCVCandle,
CandleProcessingConfig,
ProcessingStats
)
from utils.logger import get_logger
class TimeframeBucket:
"""
Time bucket for building OHLCV candles from trades.
This class accumulates trades within a specific time period
and calculates OHLCV data incrementally.
IMPORTANT: Uses RIGHT-ALIGNED timestamps
- start_time: Beginning of the interval (inclusive)
- end_time: End of the interval (exclusive) - this becomes the candle timestamp
- Example: 09:00:00 - 09:05:00 bucket -> candle timestamp = 09:05:00
"""
def __init__(self, symbol: str, timeframe: str, start_time: datetime, exchange: str = "unknown"):
"""
Initialize time bucket for candle aggregation.
Args:
symbol: Trading symbol (e.g., 'BTC-USDT')
timeframe: Time period (e.g., '1m', '5m', '1h')
start_time: Start time for this bucket (inclusive)
exchange: Exchange name
"""
self.symbol = symbol
self.timeframe = timeframe
self.start_time = start_time
self.end_time = self._calculate_end_time(start_time, timeframe)
self.exchange = exchange
# OHLCV data
self.open: Optional[Decimal] = None
self.high: Optional[Decimal] = None
self.low: Optional[Decimal] = None
self.close: Optional[Decimal] = None
self.volume: Decimal = Decimal('0')
self.trade_count: int = 0
# Tracking
self.first_trade_time: Optional[datetime] = None
self.last_trade_time: Optional[datetime] = None
self.trades: List[StandardizedTrade] = []
def add_trade(self, trade: StandardizedTrade) -> bool:
"""
Add trade to this bucket if it belongs to this time period.
Args:
trade: Standardized trade data
Returns:
True if trade was added, False if outside time range
"""
# Check if trade belongs in this bucket (start_time <= trade.timestamp < end_time)
if not (self.start_time <= trade.timestamp < self.end_time):
return False
# First trade sets open price
if self.open is None:
self.open = trade.price
self.high = trade.price
self.low = trade.price
self.first_trade_time = trade.timestamp
# Update OHLCV
self.high = max(self.high, trade.price)
self.low = min(self.low, trade.price)
self.close = trade.price # Last trade sets close
self.volume += trade.size
self.trade_count += 1
self.last_trade_time = trade.timestamp
# Store trade for detailed analysis if needed
self.trades.append(trade)
return True
def to_candle(self, is_complete: bool = True) -> OHLCVCandle:
"""
Convert bucket to OHLCV candle.
IMPORTANT: Candle timestamp = end_time (right-aligned, industry standard)
"""
return OHLCVCandle(
symbol=self.symbol,
timeframe=self.timeframe,
start_time=self.start_time,
end_time=self.end_time,
open=self.open or Decimal('0'),
high=self.high or Decimal('0'),
low=self.low or Decimal('0'),
close=self.close or Decimal('0'),
volume=self.volume,
trade_count=self.trade_count,
exchange=self.exchange,
is_complete=is_complete,
first_trade_time=self.first_trade_time,
last_trade_time=self.last_trade_time
)
def _calculate_end_time(self, start_time: datetime, timeframe: str) -> datetime:
"""Calculate end time for this timeframe (right-aligned timestamp)."""
if timeframe == '1m':
return start_time + timedelta(minutes=1)
elif timeframe == '5m':
return start_time + timedelta(minutes=5)
elif timeframe == '15m':
return start_time + timedelta(minutes=15)
elif timeframe == '30m':
return start_time + timedelta(minutes=30)
elif timeframe == '1h':
return start_time + timedelta(hours=1)
elif timeframe == '4h':
return start_time + timedelta(hours=4)
elif timeframe == '1d':
return start_time + timedelta(days=1)
else:
raise ValueError(f"Unsupported timeframe: {timeframe}")
class RealTimeCandleProcessor:
"""
Real-time candle processor for live trade data.
This class processes trades immediately as they arrive from WebSocket,
building candles incrementally and emitting completed candles when
time boundaries are crossed.
AGGREGATION PROCESS (NO FUTURE LEAKAGE):
1. Trade arrives from WebSocket/API with timestamp T
2. For each configured timeframe (1m, 5m, etc.):
a. Calculate which time bucket this trade belongs to
b. Get current bucket for this timeframe
c. Check if trade timestamp crosses time boundary
d. If boundary crossed: complete and emit previous bucket, create new bucket
e. Add trade to current bucket (updates OHLCV)
3. Only emit candles when time boundary is definitively crossed
4. Never emit incomplete/future candles during real-time processing
TIMESTAMP ALIGNMENT:
- Uses RIGHT-ALIGNED timestamps (industry standard)
- 1-minute candle covering 09:00:00-09:01:00 gets timestamp 09:01:00
- 5-minute candle covering 09:00:00-09:05:00 gets timestamp 09:05:00
- Candle represents PAST data, never future
"""
def __init__(self,
symbol: str,
exchange: str,
config: Optional[CandleProcessingConfig] = None,
component_name: str = "realtime_candle_processor"):
"""
Initialize real-time candle processor.
Args:
symbol: Trading symbol (e.g., 'BTC-USDT')
exchange: Exchange name (e.g., 'okx', 'binance')
config: Processing configuration
component_name: Name for logging
"""
self.symbol = symbol
self.exchange = exchange
self.config = config or CandleProcessingConfig()
self.component_name = component_name
self.logger = get_logger(self.component_name)
# Current buckets for each timeframe
self.current_buckets: Dict[str, TimeframeBucket] = {}
# Callback functions for completed candles
self.candle_callbacks: List[Callable[[OHLCVCandle], None]] = []
# Statistics
self.stats = ProcessingStats(active_timeframes=len(self.config.timeframes))
self.logger.info(f"Initialized real-time candle processor for {symbol} on {exchange} with timeframes: {self.config.timeframes}")
def add_candle_callback(self, callback: Callable[[OHLCVCandle], None]) -> None:
"""Add callback function to receive completed candles."""
self.candle_callbacks.append(callback)
self.logger.debug(f"Added candle callback: {callback.__name__ if hasattr(callback, '__name__') else str(callback)}")
def process_trade(self, trade: StandardizedTrade) -> List[OHLCVCandle]:
"""
Process single trade - main entry point for real-time processing.
This is called for each trade as it arrives from WebSocket.
CRITICAL: Only returns completed candles (time boundary crossed)
Never returns incomplete/future candles to prevent leakage.
Args:
trade: Standardized trade data
Returns:
List of completed candles (if any time boundaries were crossed)
"""
try:
completed_candles = []
# Process trade for each timeframe
for timeframe in self.config.timeframes:
candle = self._process_trade_for_timeframe(trade, timeframe)
if candle:
completed_candles.append(candle)
# Update statistics
self.stats.trades_processed += 1
self.stats.last_trade_time = trade.timestamp
# Emit completed candles to callbacks
for candle in completed_candles:
self._emit_candle(candle)
return completed_candles
except Exception as e:
self.logger.error(f"Error processing trade for {self.symbol}: {e}")
self.stats.errors_count += 1
return []
def _process_trade_for_timeframe(self, trade: StandardizedTrade, timeframe: str) -> Optional[OHLCVCandle]:
"""
Process trade for specific timeframe.
CRITICAL LOGIC FOR PREVENTING FUTURE LEAKAGE:
1. Calculate which bucket this trade belongs to
2. Check if current bucket exists and matches
3. If bucket mismatch (time boundary crossed), complete current bucket first
4. Create new bucket and add trade
5. Only return completed candles, never incomplete ones
"""
try:
# Calculate which bucket this trade belongs to
trade_bucket_start = self._get_bucket_start_time(trade.timestamp, timeframe)
# Check if we have a current bucket for this timeframe
current_bucket = self.current_buckets.get(timeframe)
completed_candle = None
# If no bucket exists or time boundary crossed, handle transition
if current_bucket is None:
# First bucket for this timeframe
current_bucket = TimeframeBucket(self.symbol, timeframe, trade_bucket_start, self.exchange)
self.current_buckets[timeframe] = current_bucket
elif current_bucket.start_time != trade_bucket_start:
# Time boundary crossed - complete previous bucket
if current_bucket.trade_count > 0: # Only complete if it has trades
completed_candle = current_bucket.to_candle(is_complete=True)
self.stats.candles_emitted += 1
self.stats.last_candle_time = completed_candle.end_time
# Create new bucket for current time period
current_bucket = TimeframeBucket(self.symbol, timeframe, trade_bucket_start, self.exchange)
self.current_buckets[timeframe] = current_bucket
# Add trade to current bucket
if not current_bucket.add_trade(trade):
# This should never happen if logic is correct
self.logger.warning(f"Trade {trade.timestamp} could not be added to bucket {current_bucket.start_time}-{current_bucket.end_time}")
return completed_candle
except Exception as e:
self.logger.error(f"Error processing trade for timeframe {timeframe}: {e}")
self.stats.errors_count += 1
return None
def _get_bucket_start_time(self, timestamp: datetime, timeframe: str) -> datetime:
"""
Calculate bucket start time for given timestamp and timeframe.
This function determines which time bucket a trade belongs to.
The start time is the LEFT boundary of the interval.
EXAMPLES:
- Trade at 09:03:45 for 5m timeframe -> bucket start = 09:00:00
- Trade at 09:07:23 for 5m timeframe -> bucket start = 09:05:00
- Trade at 14:00:00 for 1h timeframe -> bucket start = 14:00:00
Args:
timestamp: Trade timestamp
timeframe: Target timeframe
Returns:
Bucket start time (left boundary)
"""
# Normalize to UTC and remove microseconds for clean boundaries
dt = timestamp.replace(second=0, microsecond=0)
if timeframe == '1m':
# 1-minute buckets align to minute boundaries
return dt
elif timeframe == '5m':
# 5-minute buckets: 00:00, 00:05, 00:10, etc.
return dt.replace(minute=(dt.minute // 5) * 5)
elif timeframe == '15m':
# 15-minute buckets: 00:00, 00:15, 00:30, 00:45
return dt.replace(minute=(dt.minute // 15) * 15)
elif timeframe == '30m':
# 30-minute buckets: 00:00, 00:30
return dt.replace(minute=(dt.minute // 30) * 30)
elif timeframe == '1h':
# 1-hour buckets align to hour boundaries
return dt.replace(minute=0)
elif timeframe == '4h':
# 4-hour buckets: 00:00, 04:00, 08:00, 12:00, 16:00, 20:00
return dt.replace(minute=0, hour=(dt.hour // 4) * 4)
elif timeframe == '1d':
# 1-day buckets align to day boundaries (midnight UTC)
return dt.replace(minute=0, hour=0)
else:
raise ValueError(f"Unsupported timeframe: {timeframe}")
def _emit_candle(self, candle: OHLCVCandle) -> None:
"""Emit completed candle to all callbacks."""
try:
for callback in self.candle_callbacks:
callback(candle)
except Exception as e:
self.logger.error(f"Error in candle callback: {e}")
self.stats.errors_count += 1
def get_current_candles(self, incomplete: bool = True) -> List[OHLCVCandle]:
"""
Get current incomplete candles for all timeframes.
WARNING: These are incomplete candles and should NOT be used for trading decisions.
They are useful for monitoring/debugging only.
"""
candles = []
for bucket in self.current_buckets.values():
if bucket.trade_count > 0: # Only return buckets with trades
candles.append(bucket.to_candle(is_complete=False))
return candles
def force_complete_all_candles(self) -> List[OHLCVCandle]:
"""
Force completion of all current candles (useful for shutdown/batch processing).
WARNING: This should only be used during shutdown or batch processing,
not during live trading as it forces incomplete candles to be marked complete.
"""
completed_candles = []
for bucket in self.current_buckets.values():
if bucket.trade_count > 0:
candle = bucket.to_candle(is_complete=True)
completed_candles.append(candle)
self._emit_candle(candle)
# Clear buckets
self.current_buckets.clear()
return completed_candles
def get_stats(self) -> Dict[str, Any]:
"""Get processing statistics."""
stats_dict = self.stats.to_dict()
stats_dict['current_buckets'] = {
tf: bucket.trade_count for tf, bucket in self.current_buckets.items()
}
return stats_dict
class BatchCandleProcessor:
"""
Batch candle processor for historical data processing.
This class processes large batches of historical trades efficiently,
building candles for multiple timeframes simultaneously.
"""
def __init__(self,
symbol: str,
exchange: str,
timeframes: List[str],
component_name: str = "batch_candle_processor"):
"""
Initialize batch candle processor.
Args:
symbol: Trading symbol
exchange: Exchange name
timeframes: List of timeframes to process
component_name: Name for logging
"""
self.symbol = symbol
self.exchange = exchange
self.timeframes = timeframes
self.component_name = component_name
self.logger = get_logger(self.component_name)
# Statistics
self.stats = ProcessingStats(active_timeframes=len(timeframes))
self.logger.info(f"Initialized batch candle processor for {symbol} on {exchange}")
def process_trades_to_candles(self, trades: Iterator[StandardizedTrade]) -> List[OHLCVCandle]:
"""
Process trade iterator to candles - optimized for batch processing.
This function handles ALL scenarios:
- Historical: Batch trade iterators
- Backfill: API trade iterators
- Real-time batch: Multiple trades at once
Args:
trades: Iterator of standardized trades
Returns:
List of completed candles
"""
try:
# Create temporary processor for this batch
config = CandleProcessingConfig(timeframes=self.timeframes, auto_save_candles=False)
processor = RealTimeCandleProcessor(
self.symbol, self.exchange, config,
f"batch_processor_{self.symbol}_{self.exchange}"
)
all_candles = []
# Process all trades
for trade in trades:
completed_candles = processor.process_trade(trade)
all_candles.extend(completed_candles)
self.stats.trades_processed += 1
# Force complete any remaining candles
remaining_candles = processor.force_complete_all_candles()
all_candles.extend(remaining_candles)
# Update stats
self.stats.candles_emitted = len(all_candles)
if all_candles:
self.stats.last_candle_time = max(candle.end_time for candle in all_candles)
self.logger.info(f"Batch processed {self.stats.trades_processed} trades to {len(all_candles)} candles")
return all_candles
except Exception as e:
self.logger.error(f"Error in batch processing trades to candles: {e}")
self.stats.errors_count += 1
return []
def get_stats(self) -> Dict[str, Any]:
"""Get processing statistics."""
return self.stats.to_dict()
# Utility functions for common aggregation operations
def aggregate_trades_to_candles(trades: List[StandardizedTrade],
timeframes: List[str],
symbol: str,
exchange: str) -> List[OHLCVCandle]:
"""
Simple utility function to aggregate a list of trades to candles.
Args:
trades: List of standardized trades
timeframes: List of timeframes to generate
symbol: Trading symbol
exchange: Exchange name
Returns:
List of completed candles
"""
processor = BatchCandleProcessor(symbol, exchange, timeframes)
return processor.process_trades_to_candles(iter(trades))
def validate_timeframe(timeframe: str) -> bool:
"""
Validate if timeframe is supported.
Args:
timeframe: Timeframe string (e.g., '1m', '5m', '1h')
Returns:
True if supported, False otherwise
"""
supported = ['1m', '5m', '15m', '30m', '1h', '4h', '1d']
return timeframe in supported
def parse_timeframe(timeframe: str) -> tuple[int, str]:
"""
Parse timeframe string into number and unit.
Args:
timeframe: Timeframe string (e.g., '5m', '1h')
Returns:
Tuple of (number, unit)
Examples:
'5m' -> (5, 'm')
'1h' -> (1, 'h')
'1d' -> (1, 'd')
"""
import re
match = re.match(r'^(\d+)([mhd])$', timeframe.lower())
if not match:
raise ValueError(f"Invalid timeframe format: {timeframe}")
number = int(match.group(1))
unit = match.group(2)
return number, unit
__all__ = [
'TimeframeBucket',
'RealTimeCandleProcessor',
'BatchCandleProcessor',
'aggregate_trades_to_candles',
'validate_timeframe',
'parse_timeframe'
]

182
data/common/data_types.py Normal file
View File

@@ -0,0 +1,182 @@
"""
Common data types for all exchange implementations.
These data structures provide a unified interface for market data
regardless of the source exchange.
"""
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, field
from enum import Enum
from ..base_collector import DataType, MarketDataPoint # Import from base
@dataclass
class DataValidationResult:
"""Result of data validation - common across all exchanges."""
is_valid: bool
errors: List[str]
warnings: List[str]
sanitized_data: Optional[Dict[str, Any]] = None
@dataclass
class StandardizedTrade:
"""
Standardized trade format for unified processing across all exchanges.
This format works for both real-time and historical data processing,
ensuring consistency across all data sources and scenarios.
"""
symbol: str
trade_id: str
price: Decimal
size: Decimal
side: str # 'buy' or 'sell'
timestamp: datetime
exchange: str
raw_data: Optional[Dict[str, Any]] = None
def __post_init__(self):
"""Validate and normalize fields after initialization."""
# Ensure timestamp is timezone-aware
if self.timestamp.tzinfo is None:
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
# Normalize side to lowercase
self.side = self.side.lower()
# Validate side
if self.side not in ['buy', 'sell']:
raise ValueError(f"Invalid trade side: {self.side}")
@dataclass
class OHLCVCandle:
"""
OHLCV candle data structure for time-based aggregation.
This represents a complete candle for a specific timeframe,
built from aggregating multiple trades within the time period.
"""
symbol: str
timeframe: str
start_time: datetime
end_time: datetime
open: Decimal
high: Decimal
low: Decimal
close: Decimal
volume: Decimal
trade_count: int
exchange: str = "unknown"
is_complete: bool = False
first_trade_time: Optional[datetime] = None
last_trade_time: Optional[datetime] = None
def __post_init__(self):
"""Validate and normalize fields after initialization."""
# Ensure timestamps are timezone-aware
if self.start_time.tzinfo is None:
self.start_time = self.start_time.replace(tzinfo=timezone.utc)
if self.end_time.tzinfo is None:
self.end_time = self.end_time.replace(tzinfo=timezone.utc)
# Validate OHLC relationships
if self.high < self.low:
raise ValueError("High price cannot be less than low price")
if self.open < 0 or self.high < 0 or self.low < 0 or self.close < 0:
raise ValueError("Prices cannot be negative")
if self.volume < 0:
raise ValueError("Volume cannot be negative")
if self.trade_count < 0:
raise ValueError("Trade count cannot be negative")
def to_dict(self) -> Dict[str, Any]:
"""Convert candle to dictionary for storage/serialization."""
return {
'symbol': self.symbol,
'timeframe': self.timeframe,
'start_time': self.start_time.isoformat(),
'end_time': self.end_time.isoformat(),
'open': str(self.open),
'high': str(self.high),
'low': str(self.low),
'close': str(self.close),
'volume': str(self.volume),
'trade_count': self.trade_count,
'exchange': self.exchange,
'is_complete': self.is_complete,
'first_trade_time': self.first_trade_time.isoformat() if self.first_trade_time else None,
'last_trade_time': self.last_trade_time.isoformat() if self.last_trade_time else None
}
@dataclass
class CandleProcessingConfig:
"""Configuration for candle processing - shared across exchanges."""
timeframes: List[str] = field(default_factory=lambda: ['1m', '5m', '15m', '1h'])
auto_save_candles: bool = True
emit_incomplete_candles: bool = False
max_trades_per_candle: int = 100000 # Safety limit
def __post_init__(self):
"""Validate configuration after initialization."""
supported_timeframes = ['1m', '5m', '15m', '30m', '1h', '4h', '1d']
for tf in self.timeframes:
if tf not in supported_timeframes:
raise ValueError(f"Unsupported timeframe: {tf}")
class TradeSide(Enum):
"""Standardized trade side enumeration."""
BUY = "buy"
SELL = "sell"
class TimeframeUnit(Enum):
"""Time units for candle timeframes."""
MINUTE = "m"
HOUR = "h"
DAY = "d"
@dataclass
class ProcessingStats:
"""Common processing statistics structure."""
trades_processed: int = 0
candles_emitted: int = 0
errors_count: int = 0
warnings_count: int = 0
last_trade_time: Optional[datetime] = None
last_candle_time: Optional[datetime] = None
active_timeframes: int = 0
def to_dict(self) -> Dict[str, Any]:
"""Convert stats to dictionary."""
return {
'trades_processed': self.trades_processed,
'candles_emitted': self.candles_emitted,
'errors_count': self.errors_count,
'warnings_count': self.warnings_count,
'last_trade_time': self.last_trade_time.isoformat() if self.last_trade_time else None,
'last_candle_time': self.last_candle_time.isoformat() if self.last_candle_time else None,
'active_timeframes': self.active_timeframes
}
# Re-export from base_collector for convenience
__all__ = [
'DataType',
'MarketDataPoint',
'DataValidationResult',
'StandardizedTrade',
'OHLCVCandle',
'CandleProcessingConfig',
'TradeSide',
'TimeframeUnit',
'ProcessingStats'
]

View File

@@ -0,0 +1,471 @@
"""
Base transformation utilities for all exchanges.
This module provides common transformation patterns and base classes
for converting exchange-specific data to standardized formats.
"""
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, List, Optional, Any, Iterator
from abc import ABC, abstractmethod
from .data_types import StandardizedTrade, OHLCVCandle, DataValidationResult
from .aggregation import BatchCandleProcessor
from utils.logger import get_logger
class BaseDataTransformer(ABC):
"""
Abstract base class for exchange data transformers.
This class provides common transformation patterns that can be
extended by exchange-specific implementations.
"""
def __init__(self,
exchange_name: str,
component_name: str = "base_data_transformer"):
"""
Initialize base data transformer.
Args:
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
component_name: Name for logging
"""
self.exchange_name = exchange_name
self.component_name = component_name
self.logger = get_logger(self.component_name)
self.logger.info(f"Initialized base data transformer for {exchange_name}")
# Abstract methods that must be implemented by subclasses
@abstractmethod
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""Transform exchange-specific trade data to standardized format."""
pass
@abstractmethod
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform exchange-specific orderbook data to standardized format."""
pass
@abstractmethod
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform exchange-specific ticker data to standardized format."""
pass
# Common transformation utilities available to all subclasses
def timestamp_to_datetime(self, timestamp: Any, is_milliseconds: bool = True) -> datetime:
"""
Convert various timestamp formats to timezone-aware datetime.
Args:
timestamp: Timestamp in various formats
is_milliseconds: True if timestamp is in milliseconds
Returns:
Timezone-aware datetime object
"""
try:
# Convert to int/float
if isinstance(timestamp, str):
timestamp_num = float(timestamp)
elif isinstance(timestamp, (int, float)):
timestamp_num = float(timestamp)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
# Convert to seconds if needed
if is_milliseconds:
timestamp_num = timestamp_num / 1000
# Create timezone-aware datetime
dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
return dt
except Exception as e:
self.logger.error(f"Error converting timestamp {timestamp}: {e}")
# Return current time as fallback
return datetime.now(timezone.utc)
def safe_decimal_conversion(self, value: Any, field_name: str = "value") -> Optional[Decimal]:
"""
Safely convert value to Decimal with error handling.
Args:
value: Value to convert
field_name: Name of field for error logging
Returns:
Decimal value or None if conversion failed
"""
try:
if value is None or value == "":
return None
return Decimal(str(value))
except Exception as e:
self.logger.warning(f"Failed to convert {field_name} '{value}' to Decimal: {e}")
return None
def normalize_trade_side(self, side: str) -> str:
"""
Normalize trade side to standard format.
Args:
side: Raw trade side string
Returns:
Normalized side ('buy' or 'sell')
"""
normalized = side.lower().strip()
# Handle common variations
if normalized in ['buy', 'bid', 'b', '1']:
return 'buy'
elif normalized in ['sell', 'ask', 's', '0']:
return 'sell'
else:
self.logger.warning(f"Unknown trade side: {side}, defaulting to 'buy'")
return 'buy'
def validate_symbol_format(self, symbol: str) -> str:
"""
Validate and normalize symbol format.
Args:
symbol: Raw symbol string
Returns:
Normalized symbol string
"""
if not symbol or not isinstance(symbol, str):
raise ValueError(f"Invalid symbol: {symbol}")
# Basic normalization
normalized = symbol.upper().strip()
if not normalized:
raise ValueError("Empty symbol after normalization")
return normalized
def transform_database_record(self, record: Any) -> Optional[StandardizedTrade]:
"""
Transform database record to standardized format.
This method should be overridden by subclasses to handle
their specific database schema.
Args:
record: Database record
Returns:
StandardizedTrade or None if transformation failed
"""
self.logger.warning("transform_database_record not implemented for this exchange")
return None
def get_transformer_info(self) -> Dict[str, Any]:
"""Get transformer information."""
return {
'exchange': self.exchange_name,
'component': self.component_name,
'capabilities': {
'trade_transformation': True,
'orderbook_transformation': True,
'ticker_transformation': True,
'database_transformation': hasattr(self, 'transform_database_record')
}
}
class UnifiedDataTransformer:
"""
Unified data transformation system for all scenarios.
This class provides a common interface for transforming data from
various sources (real-time, historical, backfill) into standardized
formats for further processing.
TRANSFORMATION PROCESS:
1. Raw Data Input (exchange format, database records, etc.)
2. Validation (using exchange-specific validators)
3. Transformation to StandardizedTrade format
4. Optional aggregation to candles
5. Output in consistent format
"""
def __init__(self,
exchange_transformer: BaseDataTransformer,
component_name: str = "unified_data_transformer"):
"""
Initialize unified data transformer.
Args:
exchange_transformer: Exchange-specific transformer instance
component_name: Name for logging
"""
self.exchange_transformer = exchange_transformer
self.component_name = component_name
self.logger = get_logger(self.component_name)
self.logger.info(f"Initialized unified data transformer with {exchange_transformer.exchange_name} transformer")
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""
Transform trade data using exchange-specific transformer.
Args:
raw_data: Raw trade data from exchange
symbol: Trading symbol
Returns:
Standardized trade or None if transformation failed
"""
try:
return self.exchange_transformer.transform_trade_data(raw_data, symbol)
except Exception as e:
self.logger.error(f"Error in trade transformation: {e}")
return None
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""
Transform orderbook data using exchange-specific transformer.
Args:
raw_data: Raw orderbook data from exchange
symbol: Trading symbol
Returns:
Standardized orderbook data or None if transformation failed
"""
try:
return self.exchange_transformer.transform_orderbook_data(raw_data, symbol)
except Exception as e:
self.logger.error(f"Error in orderbook transformation: {e}")
return None
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""
Transform ticker data using exchange-specific transformer.
Args:
raw_data: Raw ticker data from exchange
symbol: Trading symbol
Returns:
Standardized ticker data or None if transformation failed
"""
try:
return self.exchange_transformer.transform_ticker_data(raw_data, symbol)
except Exception as e:
self.logger.error(f"Error in ticker transformation: {e}")
return None
def process_trades_to_candles(self,
trades: Iterator[StandardizedTrade],
timeframes: List[str],
symbol: str) -> List[OHLCVCandle]:
"""
Process any trade iterator to candles - unified processing function.
This function handles ALL scenarios:
- Real-time: Single trade iterators
- Historical: Batch trade iterators
- Backfill: API trade iterators
Args:
trades: Iterator of standardized trades
timeframes: List of timeframes to generate
symbol: Trading symbol
Returns:
List of completed candles
"""
try:
processor = BatchCandleProcessor(
symbol,
self.exchange_transformer.exchange_name,
timeframes,
f"unified_batch_processor_{symbol}"
)
candles = processor.process_trades_to_candles(trades)
self.logger.info(f"Processed {processor.get_stats()['trades_processed']} trades to {len(candles)} candles")
return candles
except Exception as e:
self.logger.error(f"Error processing trades to candles: {e}")
return []
def batch_transform_trades(self,
raw_trades: List[Dict[str, Any]],
symbol: str) -> List[StandardizedTrade]:
"""
Transform multiple trade records in batch.
Args:
raw_trades: List of raw trade data
symbol: Trading symbol
Returns:
List of successfully transformed trades
"""
transformed_trades = []
errors = 0
for raw_trade in raw_trades:
try:
trade = self.transform_trade_data(raw_trade, symbol)
if trade:
transformed_trades.append(trade)
else:
errors += 1
except Exception as e:
self.logger.error(f"Error transforming trade: {e}")
errors += 1
self.logger.info(f"Batch transformed {len(transformed_trades)} trades successfully, {errors} errors")
return transformed_trades
def get_transformer_info(self) -> Dict[str, Any]:
"""Get comprehensive transformer information."""
base_info = self.exchange_transformer.get_transformer_info()
base_info.update({
'unified_component': self.component_name,
'batch_processing': True,
'candle_aggregation': True
})
return base_info
# Utility functions for common transformation patterns
def create_standardized_trade(symbol: str,
trade_id: str,
price: Any,
size: Any,
side: str,
timestamp: Any,
exchange: str,
raw_data: Optional[Dict[str, Any]] = None,
is_milliseconds: bool = True) -> StandardizedTrade:
"""
Utility function to create StandardizedTrade with proper validation.
Args:
symbol: Trading symbol
trade_id: Trade identifier
price: Trade price (any numeric type)
size: Trade size (any numeric type)
side: Trade side ('buy' or 'sell')
timestamp: Trade timestamp
exchange: Exchange name
raw_data: Original raw data
is_milliseconds: True if timestamp is in milliseconds
Returns:
StandardizedTrade object
Raises:
ValueError: If data is invalid
"""
# Convert timestamp
if isinstance(timestamp, (int, float, str)):
timestamp_num = float(timestamp)
if is_milliseconds:
timestamp_num = timestamp_num / 1000
dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
elif isinstance(timestamp, datetime):
dt = timestamp
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
# Convert price and size to Decimal
try:
decimal_price = Decimal(str(price))
decimal_size = Decimal(str(size))
except Exception as e:
raise ValueError(f"Invalid price or size: {e}")
# Normalize side
normalized_side = side.lower().strip()
if normalized_side not in ['buy', 'sell']:
raise ValueError(f"Invalid trade side: {side}")
return StandardizedTrade(
symbol=symbol.upper().strip(),
trade_id=str(trade_id),
price=decimal_price,
size=decimal_size,
side=normalized_side,
timestamp=dt,
exchange=exchange.lower(),
raw_data=raw_data
)
def batch_create_standardized_trades(raw_trades: List[Dict[str, Any]],
symbol: str,
exchange: str,
field_mapping: Dict[str, str],
is_milliseconds: bool = True) -> List[StandardizedTrade]:
"""
Batch create standardized trades from raw data.
Args:
raw_trades: List of raw trade dictionaries
symbol: Trading symbol
exchange: Exchange name
field_mapping: Mapping of StandardizedTrade fields to raw data fields
is_milliseconds: True if timestamps are in milliseconds
Returns:
List of successfully created StandardizedTrade objects
Example field_mapping:
{
'trade_id': 'id',
'price': 'px',
'size': 'sz',
'side': 'side',
'timestamp': 'ts'
}
"""
trades = []
for raw_trade in raw_trades:
try:
trade = create_standardized_trade(
symbol=symbol,
trade_id=raw_trade[field_mapping['trade_id']],
price=raw_trade[field_mapping['price']],
size=raw_trade[field_mapping['size']],
side=raw_trade[field_mapping['side']],
timestamp=raw_trade[field_mapping['timestamp']],
exchange=exchange,
raw_data=raw_trade,
is_milliseconds=is_milliseconds
)
trades.append(trade)
except Exception as e:
# Log error but continue processing
logger = get_logger("batch_transform")
logger.warning(f"Failed to transform trade: {e}")
return trades
__all__ = [
'BaseDataTransformer',
'UnifiedDataTransformer',
'create_standardized_trade',
'batch_create_standardized_trades'
]

484
data/common/validation.py Normal file
View File

@@ -0,0 +1,484 @@
"""
Base validation utilities for all exchanges.
This module provides common validation patterns and base classes
that can be extended by exchange-specific validators.
"""
import re
from datetime import datetime, timezone, timedelta
from decimal import Decimal, InvalidOperation
from typing import Dict, List, Optional, Any, Union, Pattern
from abc import ABC, abstractmethod
from .data_types import DataValidationResult, StandardizedTrade, TradeSide
from utils.logger import get_logger
class ValidationResult:
"""Simple validation result for individual field validation."""
def __init__(self, is_valid: bool, errors: List[str] = None, warnings: List[str] = None, sanitized_data: Any = None):
self.is_valid = is_valid
self.errors = errors or []
self.warnings = warnings or []
self.sanitized_data = sanitized_data
class BaseDataValidator(ABC):
"""
Abstract base class for exchange data validators.
This class provides common validation patterns and utilities
that can be reused across different exchange implementations.
"""
def __init__(self,
exchange_name: str,
component_name: str = "base_data_validator"):
"""
Initialize base data validator.
Args:
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
component_name: Name for logging
"""
self.exchange_name = exchange_name
self.component_name = component_name
self.logger = get_logger(self.component_name)
# Common validation patterns
self._numeric_pattern = re.compile(r'^-?\d*\.?\d+$')
self._trade_id_pattern = re.compile(r'^[a-zA-Z0-9_-]+$') # Flexible pattern
# Valid trade sides
self._valid_trade_sides = {'buy', 'sell'}
# Common price and size limits (can be overridden by subclasses)
self._min_price = Decimal('0.00000001') # 1 satoshi equivalent
self._max_price = Decimal('10000000') # 10 million
self._min_size = Decimal('0.00000001') # Minimum trade size
self._max_size = Decimal('1000000000') # 1 billion max size
# Timestamp validation (milliseconds since epoch)
self._min_timestamp = 1000000000000 # 2001-09-09 (reasonable minimum)
self._max_timestamp = 9999999999999 # 2286-11-20 (reasonable maximum)
self.logger.debug(f"Initialized base data validator for {exchange_name}")
# Abstract methods that must be implemented by subclasses
@abstractmethod
def validate_symbol_format(self, symbol: str) -> ValidationResult:
"""Validate exchange-specific symbol format."""
pass
@abstractmethod
def validate_websocket_message(self, message: Dict[str, Any]) -> DataValidationResult:
"""Validate complete WebSocket message structure."""
pass
# Common validation methods available to all subclasses
def validate_price(self, price: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate price value with common rules.
Args:
price: Price value to validate
Returns:
ValidationResult with sanitized decimal price
"""
errors = []
warnings = []
sanitized_data = None
try:
# Convert to Decimal for precise validation
if isinstance(price, str) and price.strip() == "":
errors.append("Empty price string")
return ValidationResult(False, errors, warnings)
decimal_price = Decimal(str(price))
sanitized_data = decimal_price
# Check for negative prices
if decimal_price <= 0:
errors.append(f"Price must be positive, got {decimal_price}")
# Check price bounds
if decimal_price < self._min_price:
warnings.append(f"Price {decimal_price} below minimum {self._min_price}")
elif decimal_price > self._max_price:
warnings.append(f"Price {decimal_price} above maximum {self._max_price}")
# Check for excessive decimal places (warn only)
if decimal_price.as_tuple().exponent < -12:
warnings.append(f"Price has excessive decimal precision: {decimal_price}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid price value: {price} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_size(self, size: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate size/quantity value with common rules.
Args:
size: Size value to validate
Returns:
ValidationResult with sanitized decimal size
"""
errors = []
warnings = []
sanitized_data = None
try:
# Convert to Decimal for precise validation
if isinstance(size, str) and size.strip() == "":
errors.append("Empty size string")
return ValidationResult(False, errors, warnings)
decimal_size = Decimal(str(size))
sanitized_data = decimal_size
# Check for negative or zero sizes
if decimal_size <= 0:
errors.append(f"Size must be positive, got {decimal_size}")
# Check size bounds
if decimal_size < self._min_size:
warnings.append(f"Size {decimal_size} below minimum {self._min_size}")
elif decimal_size > self._max_size:
warnings.append(f"Size {decimal_size} above maximum {self._max_size}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid size value: {size} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_volume(self, volume: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate volume value with common rules.
Args:
volume: Volume value to validate
Returns:
ValidationResult
"""
errors = []
warnings = []
try:
decimal_volume = Decimal(str(volume))
# Volume can be zero (no trades in period)
if decimal_volume < 0:
errors.append(f"Volume cannot be negative, got {decimal_volume}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid volume value: {volume} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_trade_side(self, side: str) -> ValidationResult:
"""
Validate trade side with common rules.
Args:
side: Trade side string
Returns:
ValidationResult
"""
errors = []
warnings = []
if not isinstance(side, str):
errors.append(f"Trade side must be string, got {type(side)}")
return ValidationResult(False, errors, warnings)
normalized_side = side.lower()
if normalized_side not in self._valid_trade_sides:
errors.append(f"Invalid trade side: {side}. Must be 'buy' or 'sell'")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_timestamp(self, timestamp: Union[str, int], is_milliseconds: bool = True) -> ValidationResult:
"""
Validate timestamp value with common rules.
Args:
timestamp: Timestamp value to validate
is_milliseconds: True if timestamp is in milliseconds, False for seconds
Returns:
ValidationResult
"""
errors = []
warnings = []
try:
# Convert to int
if isinstance(timestamp, str):
if not timestamp.isdigit():
errors.append(f"Invalid timestamp format: {timestamp}")
return ValidationResult(False, errors, warnings)
timestamp_int = int(timestamp)
elif isinstance(timestamp, int):
timestamp_int = timestamp
else:
errors.append(f"Timestamp must be string or int, got {type(timestamp)}")
return ValidationResult(False, errors, warnings)
# Convert to milliseconds if needed
if not is_milliseconds:
timestamp_int = timestamp_int * 1000
# Check timestamp bounds
if timestamp_int < self._min_timestamp:
errors.append(f"Timestamp {timestamp_int} too old")
elif timestamp_int > self._max_timestamp:
errors.append(f"Timestamp {timestamp_int} too far in future")
# Check if timestamp is reasonable (within last year to next year)
current_time_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
one_year_ms = 365 * 24 * 60 * 60 * 1000
if timestamp_int < (current_time_ms - one_year_ms):
warnings.append(f"Timestamp {timestamp_int} is older than 1 year")
elif timestamp_int > (current_time_ms + one_year_ms):
warnings.append(f"Timestamp {timestamp_int} is more than 1 year in future")
except (ValueError, TypeError) as e:
errors.append(f"Invalid timestamp: {timestamp} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_trade_id(self, trade_id: Union[str, int]) -> ValidationResult:
"""
Validate trade ID with flexible rules.
Args:
trade_id: Trade ID to validate
Returns:
ValidationResult
"""
errors = []
warnings = []
if isinstance(trade_id, int):
trade_id = str(trade_id)
if not isinstance(trade_id, str):
errors.append(f"Trade ID must be string or int, got {type(trade_id)}")
return ValidationResult(False, errors, warnings)
if not trade_id.strip():
errors.append("Trade ID cannot be empty")
return ValidationResult(False, errors, warnings)
# Flexible validation - allow alphanumeric, underscore, hyphen
if not self._trade_id_pattern.match(trade_id):
warnings.append(f"Trade ID has unusual format: {trade_id}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_symbol_match(self, symbol: str, expected_symbol: Optional[str] = None) -> ValidationResult:
"""
Validate symbol matches expected value.
Args:
symbol: Symbol to validate
expected_symbol: Expected symbol value
Returns:
ValidationResult
"""
errors = []
warnings = []
if not isinstance(symbol, str):
errors.append(f"Symbol must be string, got {type(symbol)}")
return ValidationResult(False, errors, warnings)
if expected_symbol and symbol != expected_symbol:
warnings.append(f"Symbol mismatch: expected {expected_symbol}, got {symbol}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_orderbook_side(self, side_data: List[List[str]], side_name: str) -> ValidationResult:
"""
Validate orderbook side (asks or bids) with common rules.
Args:
side_data: List of price/size pairs
side_name: Name of side for error messages
Returns:
ValidationResult with sanitized data
"""
errors = []
warnings = []
sanitized_data = []
if not isinstance(side_data, list):
errors.append(f"{side_name} must be a list")
return ValidationResult(False, errors, warnings)
for i, level in enumerate(side_data):
if not isinstance(level, list) or len(level) < 2:
errors.append(f"{side_name}[{i}] must be a list with at least 2 elements")
continue
# Validate price and size
price_result = self.validate_price(level[0])
size_result = self.validate_size(level[1])
if not price_result.is_valid:
errors.extend([f"{side_name}[{i}] price: {error}" for error in price_result.errors])
if not size_result.is_valid:
errors.extend([f"{side_name}[{i}] size: {error}" for error in size_result.errors])
# Add sanitized level
if price_result.is_valid and size_result.is_valid:
sanitized_level = [str(price_result.sanitized_data), str(size_result.sanitized_data)]
# Include additional fields if present
if len(level) > 2:
sanitized_level.extend(level[2:])
sanitized_data.append(sanitized_level)
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_standardized_trade(self, trade: StandardizedTrade) -> DataValidationResult:
"""
Validate a standardized trade object.
Args:
trade: StandardizedTrade object to validate
Returns:
DataValidationResult
"""
errors = []
warnings = []
try:
# Validate price
price_result = self.validate_price(trade.price)
if not price_result.is_valid:
errors.extend([f"price: {error}" for error in price_result.errors])
warnings.extend([f"price: {warning}" for warning in price_result.warnings])
# Validate size
size_result = self.validate_size(trade.size)
if not size_result.is_valid:
errors.extend([f"size: {error}" for error in size_result.errors])
warnings.extend([f"size: {warning}" for warning in size_result.warnings])
# Validate side
side_result = self.validate_trade_side(trade.side)
if not side_result.is_valid:
errors.extend([f"side: {error}" for error in side_result.errors])
# Validate trade ID
trade_id_result = self.validate_trade_id(trade.trade_id)
if not trade_id_result.is_valid:
errors.extend([f"trade_id: {error}" for error in trade_id_result.errors])
warnings.extend([f"trade_id: {warning}" for warning in trade_id_result.warnings])
# Validate symbol format (exchange-specific)
symbol_result = self.validate_symbol_format(trade.symbol)
if not symbol_result.is_valid:
errors.extend([f"symbol: {error}" for error in symbol_result.errors])
warnings.extend([f"symbol: {warning}" for warning in symbol_result.warnings])
# Validate timestamp
timestamp_ms = int(trade.timestamp.timestamp() * 1000)
timestamp_result = self.validate_timestamp(timestamp_ms, is_milliseconds=True)
if not timestamp_result.is_valid:
errors.extend([f"timestamp: {error}" for error in timestamp_result.errors])
warnings.extend([f"timestamp: {warning}" for warning in timestamp_result.warnings])
return DataValidationResult(len(errors) == 0, errors, warnings)
except Exception as e:
errors.append(f"Exception during trade validation: {str(e)}")
return DataValidationResult(False, errors, warnings)
def get_validator_info(self) -> Dict[str, Any]:
"""Get validator configuration information."""
return {
'exchange': self.exchange_name,
'component': self.component_name,
'limits': {
'min_price': str(self._min_price),
'max_price': str(self._max_price),
'min_size': str(self._min_size),
'max_size': str(self._max_size),
'min_timestamp': self._min_timestamp,
'max_timestamp': self._max_timestamp
},
'patterns': {
'numeric': self._numeric_pattern.pattern,
'trade_id': self._trade_id_pattern.pattern
}
}
# Utility functions for common validation patterns
def is_valid_decimal(value: Any) -> bool:
"""Check if value can be converted to a valid decimal."""
try:
Decimal(str(value))
return True
except (InvalidOperation, ValueError, TypeError):
return False
def normalize_symbol(symbol: str, exchange: str) -> str:
"""
Normalize symbol format for exchange.
Args:
symbol: Raw symbol string
exchange: Exchange name
Returns:
Normalized symbol string
"""
# Basic normalization - can be extended per exchange
return symbol.upper().strip()
def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> List[str]:
"""
Validate that all required fields are present in data.
Args:
data: Data dictionary to check
required_fields: List of required field names
Returns:
List of missing field names
"""
missing_fields = []
for field in required_fields:
if field not in data or data[field] is None:
missing_fields.append(field)
return missing_fields
__all__ = [
'ValidationResult',
'BaseDataValidator',
'is_valid_decimal',
'normalize_symbol',
'validate_required_fields'
]