TCPDashboard/data/common/transformation.py
Vasily.onl 8bb5f28fd2 Add common data processing framework for OKX exchange
- Introduced a modular architecture for data processing, including common utilities for validation, transformation, and aggregation.
- Implemented `StandardizedTrade`, `OHLCVCandle`, and `TimeframeBucket` classes for unified data handling across exchanges.
- Developed `OKXDataProcessor` for OKX-specific data validation and processing, leveraging the new common framework.
- Enhanced `OKXCollector` to utilize the common data processing utilities, improving modularity and maintainability.
- Updated documentation to reflect the new architecture and provide guidance on the data processing framework.
- Created comprehensive tests for the new data processing components to ensure reliability and functionality.
2025-05-31 21:58:47 +08:00

471 lines
16 KiB
Python

"""
Base transformation utilities for all exchanges.
This module provides common transformation patterns and base classes
for converting exchange-specific data to standardized formats.
"""
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, List, Optional, Any, Iterator
from abc import ABC, abstractmethod
from .data_types import StandardizedTrade, OHLCVCandle, DataValidationResult
from .aggregation import BatchCandleProcessor
from utils.logger import get_logger
class BaseDataTransformer(ABC):
"""
Abstract base class for exchange data transformers.
This class provides common transformation patterns that can be
extended by exchange-specific implementations.
"""
def __init__(self,
exchange_name: str,
component_name: str = "base_data_transformer"):
"""
Initialize base data transformer.
Args:
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
component_name: Name for logging
"""
self.exchange_name = exchange_name
self.component_name = component_name
self.logger = get_logger(self.component_name)
self.logger.info(f"Initialized base data transformer for {exchange_name}")
# Abstract methods that must be implemented by subclasses
@abstractmethod
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""Transform exchange-specific trade data to standardized format."""
pass
@abstractmethod
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform exchange-specific orderbook data to standardized format."""
pass
@abstractmethod
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform exchange-specific ticker data to standardized format."""
pass
# Common transformation utilities available to all subclasses
def timestamp_to_datetime(self, timestamp: Any, is_milliseconds: bool = True) -> datetime:
"""
Convert various timestamp formats to timezone-aware datetime.
Args:
timestamp: Timestamp in various formats
is_milliseconds: True if timestamp is in milliseconds
Returns:
Timezone-aware datetime object
"""
try:
# Convert to int/float
if isinstance(timestamp, str):
timestamp_num = float(timestamp)
elif isinstance(timestamp, (int, float)):
timestamp_num = float(timestamp)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
# Convert to seconds if needed
if is_milliseconds:
timestamp_num = timestamp_num / 1000
# Create timezone-aware datetime
dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
return dt
except Exception as e:
self.logger.error(f"Error converting timestamp {timestamp}: {e}")
# Return current time as fallback
return datetime.now(timezone.utc)
def safe_decimal_conversion(self, value: Any, field_name: str = "value") -> Optional[Decimal]:
"""
Safely convert value to Decimal with error handling.
Args:
value: Value to convert
field_name: Name of field for error logging
Returns:
Decimal value or None if conversion failed
"""
try:
if value is None or value == "":
return None
return Decimal(str(value))
except Exception as e:
self.logger.warning(f"Failed to convert {field_name} '{value}' to Decimal: {e}")
return None
def normalize_trade_side(self, side: str) -> str:
"""
Normalize trade side to standard format.
Args:
side: Raw trade side string
Returns:
Normalized side ('buy' or 'sell')
"""
normalized = side.lower().strip()
# Handle common variations
if normalized in ['buy', 'bid', 'b', '1']:
return 'buy'
elif normalized in ['sell', 'ask', 's', '0']:
return 'sell'
else:
self.logger.warning(f"Unknown trade side: {side}, defaulting to 'buy'")
return 'buy'
def validate_symbol_format(self, symbol: str) -> str:
"""
Validate and normalize symbol format.
Args:
symbol: Raw symbol string
Returns:
Normalized symbol string
"""
if not symbol or not isinstance(symbol, str):
raise ValueError(f"Invalid symbol: {symbol}")
# Basic normalization
normalized = symbol.upper().strip()
if not normalized:
raise ValueError("Empty symbol after normalization")
return normalized
def transform_database_record(self, record: Any) -> Optional[StandardizedTrade]:
"""
Transform database record to standardized format.
This method should be overridden by subclasses to handle
their specific database schema.
Args:
record: Database record
Returns:
StandardizedTrade or None if transformation failed
"""
self.logger.warning("transform_database_record not implemented for this exchange")
return None
def get_transformer_info(self) -> Dict[str, Any]:
"""Get transformer information."""
return {
'exchange': self.exchange_name,
'component': self.component_name,
'capabilities': {
'trade_transformation': True,
'orderbook_transformation': True,
'ticker_transformation': True,
'database_transformation': hasattr(self, 'transform_database_record')
}
}
class UnifiedDataTransformer:
"""
Unified data transformation system for all scenarios.
This class provides a common interface for transforming data from
various sources (real-time, historical, backfill) into standardized
formats for further processing.
TRANSFORMATION PROCESS:
1. Raw Data Input (exchange format, database records, etc.)
2. Validation (using exchange-specific validators)
3. Transformation to StandardizedTrade format
4. Optional aggregation to candles
5. Output in consistent format
"""
def __init__(self,
exchange_transformer: BaseDataTransformer,
component_name: str = "unified_data_transformer"):
"""
Initialize unified data transformer.
Args:
exchange_transformer: Exchange-specific transformer instance
component_name: Name for logging
"""
self.exchange_transformer = exchange_transformer
self.component_name = component_name
self.logger = get_logger(self.component_name)
self.logger.info(f"Initialized unified data transformer with {exchange_transformer.exchange_name} transformer")
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""
Transform trade data using exchange-specific transformer.
Args:
raw_data: Raw trade data from exchange
symbol: Trading symbol
Returns:
Standardized trade or None if transformation failed
"""
try:
return self.exchange_transformer.transform_trade_data(raw_data, symbol)
except Exception as e:
self.logger.error(f"Error in trade transformation: {e}")
return None
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""
Transform orderbook data using exchange-specific transformer.
Args:
raw_data: Raw orderbook data from exchange
symbol: Trading symbol
Returns:
Standardized orderbook data or None if transformation failed
"""
try:
return self.exchange_transformer.transform_orderbook_data(raw_data, symbol)
except Exception as e:
self.logger.error(f"Error in orderbook transformation: {e}")
return None
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""
Transform ticker data using exchange-specific transformer.
Args:
raw_data: Raw ticker data from exchange
symbol: Trading symbol
Returns:
Standardized ticker data or None if transformation failed
"""
try:
return self.exchange_transformer.transform_ticker_data(raw_data, symbol)
except Exception as e:
self.logger.error(f"Error in ticker transformation: {e}")
return None
def process_trades_to_candles(self,
trades: Iterator[StandardizedTrade],
timeframes: List[str],
symbol: str) -> List[OHLCVCandle]:
"""
Process any trade iterator to candles - unified processing function.
This function handles ALL scenarios:
- Real-time: Single trade iterators
- Historical: Batch trade iterators
- Backfill: API trade iterators
Args:
trades: Iterator of standardized trades
timeframes: List of timeframes to generate
symbol: Trading symbol
Returns:
List of completed candles
"""
try:
processor = BatchCandleProcessor(
symbol,
self.exchange_transformer.exchange_name,
timeframes,
f"unified_batch_processor_{symbol}"
)
candles = processor.process_trades_to_candles(trades)
self.logger.info(f"Processed {processor.get_stats()['trades_processed']} trades to {len(candles)} candles")
return candles
except Exception as e:
self.logger.error(f"Error processing trades to candles: {e}")
return []
def batch_transform_trades(self,
raw_trades: List[Dict[str, Any]],
symbol: str) -> List[StandardizedTrade]:
"""
Transform multiple trade records in batch.
Args:
raw_trades: List of raw trade data
symbol: Trading symbol
Returns:
List of successfully transformed trades
"""
transformed_trades = []
errors = 0
for raw_trade in raw_trades:
try:
trade = self.transform_trade_data(raw_trade, symbol)
if trade:
transformed_trades.append(trade)
else:
errors += 1
except Exception as e:
self.logger.error(f"Error transforming trade: {e}")
errors += 1
self.logger.info(f"Batch transformed {len(transformed_trades)} trades successfully, {errors} errors")
return transformed_trades
def get_transformer_info(self) -> Dict[str, Any]:
"""Get comprehensive transformer information."""
base_info = self.exchange_transformer.get_transformer_info()
base_info.update({
'unified_component': self.component_name,
'batch_processing': True,
'candle_aggregation': True
})
return base_info
# Utility functions for common transformation patterns
def create_standardized_trade(symbol: str,
trade_id: str,
price: Any,
size: Any,
side: str,
timestamp: Any,
exchange: str,
raw_data: Optional[Dict[str, Any]] = None,
is_milliseconds: bool = True) -> StandardizedTrade:
"""
Utility function to create StandardizedTrade with proper validation.
Args:
symbol: Trading symbol
trade_id: Trade identifier
price: Trade price (any numeric type)
size: Trade size (any numeric type)
side: Trade side ('buy' or 'sell')
timestamp: Trade timestamp
exchange: Exchange name
raw_data: Original raw data
is_milliseconds: True if timestamp is in milliseconds
Returns:
StandardizedTrade object
Raises:
ValueError: If data is invalid
"""
# Convert timestamp
if isinstance(timestamp, (int, float, str)):
timestamp_num = float(timestamp)
if is_milliseconds:
timestamp_num = timestamp_num / 1000
dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
elif isinstance(timestamp, datetime):
dt = timestamp
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
# Convert price and size to Decimal
try:
decimal_price = Decimal(str(price))
decimal_size = Decimal(str(size))
except Exception as e:
raise ValueError(f"Invalid price or size: {e}")
# Normalize side
normalized_side = side.lower().strip()
if normalized_side not in ['buy', 'sell']:
raise ValueError(f"Invalid trade side: {side}")
return StandardizedTrade(
symbol=symbol.upper().strip(),
trade_id=str(trade_id),
price=decimal_price,
size=decimal_size,
side=normalized_side,
timestamp=dt,
exchange=exchange.lower(),
raw_data=raw_data
)
def batch_create_standardized_trades(raw_trades: List[Dict[str, Any]],
symbol: str,
exchange: str,
field_mapping: Dict[str, str],
is_milliseconds: bool = True) -> List[StandardizedTrade]:
"""
Batch create standardized trades from raw data.
Args:
raw_trades: List of raw trade dictionaries
symbol: Trading symbol
exchange: Exchange name
field_mapping: Mapping of StandardizedTrade fields to raw data fields
is_milliseconds: True if timestamps are in milliseconds
Returns:
List of successfully created StandardizedTrade objects
Example field_mapping:
{
'trade_id': 'id',
'price': 'px',
'size': 'sz',
'side': 'side',
'timestamp': 'ts'
}
"""
trades = []
for raw_trade in raw_trades:
try:
trade = create_standardized_trade(
symbol=symbol,
trade_id=raw_trade[field_mapping['trade_id']],
price=raw_trade[field_mapping['price']],
size=raw_trade[field_mapping['size']],
side=raw_trade[field_mapping['side']],
timestamp=raw_trade[field_mapping['timestamp']],
exchange=exchange,
raw_data=raw_trade,
is_milliseconds=is_milliseconds
)
trades.append(trade)
except Exception as e:
# Log error but continue processing
logger = get_logger("batch_transform")
logger.warning(f"Failed to transform trade: {e}")
return trades
__all__ = [
'BaseDataTransformer',
'UnifiedDataTransformer',
'create_standardized_trade',
'batch_create_standardized_trades'
]