Add common data processing framework for OKX exchange

- Introduced a modular architecture for data processing, including common utilities for validation, transformation, and aggregation.
- Implemented `StandardizedTrade`, `OHLCVCandle`, and `TimeframeBucket` classes for unified data handling across exchanges.
- Developed `OKXDataProcessor` for OKX-specific data validation and processing, leveraging the new common framework.
- Enhanced `OKXCollector` to utilize the common data processing utilities, improving modularity and maintainability.
- Updated documentation to reflect the new architecture and provide guidance on the data processing framework.
- Created comprehensive tests for the new data processing components to ensure reliability and functionality.
This commit is contained in:
Vasily.onl
2025-05-31 21:58:47 +08:00
parent fa63e7eb2e
commit 8bb5f28fd2
15 changed files with 4015 additions and 214 deletions

View File

@@ -8,18 +8,19 @@ error handling, health monitoring, and database integration.
import asyncio
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, List, Optional, Any, Set
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from ...base_collector import (
BaseDataCollector, DataType, CollectorStatus, MarketDataPoint,
OHLCVData, DataValidationError, ConnectionError
)
from ...common import StandardizedTrade, OHLCVCandle
from .websocket import (
OKXWebSocketClient, OKXSubscription, OKXChannelType,
ConnectionState, OKXWebSocketError
)
from .data_processor import OKXDataProcessor
from database.connection import get_db_manager, get_raw_data_manager
from database.models import MarketData, RawTrade
from utils.logger import get_logger
@@ -41,6 +42,8 @@ class OKXCollector(BaseDataCollector):
This collector handles a single trading pair and collects real-time data
including trades, orderbook, and ticker information from OKX exchange.
Uses the new common data processing framework for validation, transformation,
and aggregation.
"""
def __init__(self,
@@ -86,14 +89,22 @@ class OKXCollector(BaseDataCollector):
# WebSocket client
self._ws_client: Optional[OKXWebSocketClient] = None
# Data processor using new common framework
self._data_processor = OKXDataProcessor(symbol, component_name=f"{component_name}_processor")
# Add callbacks for processed data
self._data_processor.add_trade_callback(self._on_trade_processed)
self._data_processor.add_candle_callback(self._on_candle_processed)
# Database managers
self._db_manager = None
self._raw_data_manager = None
# Data processing
self._message_buffer: List[Dict[str, Any]] = []
self._last_trade_id: Optional[str] = None
self._last_orderbook_ts: Optional[int] = None
# Data processing counters
self._message_count = 0
self._processed_trades = 0
self._processed_candles = 0
self._error_count = 0
# OKX channel mapping
self._channel_mapping = {
@@ -103,6 +114,7 @@ class OKXCollector(BaseDataCollector):
}
self.logger.info(f"Initialized OKX collector for {symbol} with data types: {[dt.value for dt in data_types]}")
self.logger.info(f"Using common data processing framework")
async def connect(self) -> bool:
"""
@@ -200,14 +212,13 @@ class OKXCollector(BaseDataCollector):
# Subscribe to channels
success = await self._ws_client.subscribe(subscriptions)
if success:
self.logger.info(f"Successfully subscribed to {len(subscriptions)} channels for {self.symbol}")
return True
else:
self.logger.error(f"Failed to subscribe to channels for {self.symbol}")
return success
return False
except Exception as e:
self.logger.error(f"Error subscribing to data for {self.symbol}: {e}")
return False
@@ -224,11 +235,11 @@ class OKXCollector(BaseDataCollector):
True if unsubscription successful, False otherwise
"""
if not self._ws_client or not self._ws_client.is_connected:
self.logger.warning("WebSocket client not connected for unsubscription")
return True # Consider it successful if already disconnected
self.logger.warning("WebSocket client not connected")
return True # Consider it successful if not connected
try:
# Build unsubscriptions
# Build unsubscription list
subscriptions = []
for data_type in data_types:
if data_type in self._channel_mapping:
@@ -236,7 +247,7 @@ class OKXCollector(BaseDataCollector):
subscription = OKXSubscription(
channel=channel,
inst_id=self.symbol,
enabled=False
enabled=False # False for unsubscribe
)
subscriptions.append(subscription)
@@ -245,241 +256,223 @@ class OKXCollector(BaseDataCollector):
# Unsubscribe from channels
success = await self._ws_client.unsubscribe(subscriptions)
if success:
self.logger.info(f"Successfully unsubscribed from {len(subscriptions)} channels for {self.symbol}")
return True
else:
self.logger.warning(f"Failed to unsubscribe from channels for {self.symbol}")
return success
self.logger.error(f"Failed to unsubscribe from channels for {self.symbol}")
return False
except Exception as e:
self.logger.error(f"Error unsubscribing from data for {self.symbol}: {e}")
return False
async def _process_message(self, message: Any) -> Optional[MarketDataPoint]:
"""
Process incoming message from OKX WebSocket.
Process received message using the new data processor.
Args:
message: Raw message from WebSocket
Returns:
Processed MarketDataPoint or None if processing failed
MarketDataPoint if processing successful, None otherwise
"""
if not isinstance(message, dict):
self.logger.warning(f"Received non-dict message: {type(message)}")
return None
try:
if not isinstance(message, dict):
self.logger.warning(f"Unexpected message type: {type(message)}")
self._message_count += 1
# Use the new data processor for validation and processing
success, market_data_points, errors = self._data_processor.validate_and_process_message(
message, expected_symbol=self.symbol
)
if not success:
self._error_count += 1
self.logger.error(f"Message processing failed: {errors}")
return None
# Extract channel and data
arg = message.get('arg', {})
channel = arg.get('channel')
inst_id = arg.get('instId')
data_list = message.get('data', [])
if errors:
self.logger.warning(f"Message processing warnings: {errors}")
# Validate message structure
if not channel or not inst_id or not data_list:
self.logger.debug(f"Incomplete message structure: {message}")
return None
# Store raw data if enabled (for debugging/compliance)
if self.store_raw_data and 'data' in message and 'arg' in message:
await self._store_raw_data(message['arg'].get('channel', 'unknown'), message)
# Check if this message is for our symbol
if inst_id != self.symbol:
self.logger.debug(f"Message for different symbol: {inst_id} (expected: {self.symbol})")
return None
# Store processed market data points in raw_trades table
for data_point in market_data_points:
await self._store_processed_data(data_point)
# Process each data item
market_data_points = []
for data_item in data_list:
data_point = await self._process_data_item(channel, data_item)
if data_point:
market_data_points.append(data_point)
# Store raw data if enabled
if self.store_raw_data and self._raw_data_manager:
await self._store_raw_data(channel, message)
# Return the first processed data point (for the base class interface)
# Return the first data point for compatibility (most use cases have single data point per message)
return market_data_points[0] if market_data_points else None
except Exception as e:
self.logger.error(f"Error processing message for {self.symbol}: {e}")
self._error_count += 1
self.logger.error(f"Error processing message: {e}")
return None
async def _handle_messages(self) -> None:
"""
Handle incoming messages from WebSocket.
This is called by the base class message loop.
"""
# The actual message handling is done through the WebSocket client callback
# This method satisfies the abstract method requirement
if self._ws_client and self._ws_client.is_connected:
# Just sleep briefly to yield control
await asyncio.sleep(0.1)
else:
# If not connected, sleep longer to avoid busy loop
await asyncio.sleep(1.0)
async def _process_data_item(self, channel: str, data_item: Dict[str, Any]) -> Optional[MarketDataPoint]:
"""
Process individual data item from OKX message.
Args:
channel: OKX channel name
data_item: Individual data item
Returns:
Processed MarketDataPoint or None
"""
try:
# Determine data type from channel
data_type = None
for dt, ch in self._channel_mapping.items():
if ch == channel:
data_type = dt
break
if not data_type:
self.logger.warning(f"Unknown channel: {channel}")
return None
# Extract timestamp
timestamp_ms = data_item.get('ts')
if timestamp_ms:
timestamp = datetime.fromtimestamp(int(timestamp_ms) / 1000, tz=timezone.utc)
else:
timestamp = datetime.now(timezone.utc)
# Create MarketDataPoint
market_data_point = MarketDataPoint(
exchange="okx",
symbol=self.symbol,
timestamp=timestamp,
data_type=data_type,
data=data_item
)
# Store processed data to database
await self._store_processed_data(market_data_point)
# Update statistics
self._stats['messages_processed'] += 1
self._stats['last_message_time'] = timestamp
return market_data_point
except Exception as e:
self.logger.error(f"Error processing data item for {self.symbol}: {e}")
self._stats['errors'] += 1
return None
"""Handle message processing in the background."""
# The new data processor handles messages through callbacks
# This method exists for compatibility with BaseDataCollector
await asyncio.sleep(0.1)
async def _store_processed_data(self, data_point: MarketDataPoint) -> None:
"""
Store processed data to MarketData table.
Store raw market data in the raw_trades table.
Args:
data_point: Processed market data point
"""
try:
# For now, we'll focus on trade data storage
# Orderbook and ticker storage can be added later
if data_point.data_type == DataType.TRADE:
await self._store_trade_data(data_point)
except Exception as e:
self.logger.error(f"Error storing processed data for {self.symbol}: {e}")
async def _store_trade_data(self, data_point: MarketDataPoint) -> None:
"""
Store trade data to database.
Args:
data_point: Trade data point
data_point: Raw market data point (trade, orderbook, ticker)
"""
try:
if not self._db_manager:
return
trade_data = data_point.data
# Extract trade information
trade_id = trade_data.get('tradeId')
price = Decimal(str(trade_data.get('px', '0')))
size = Decimal(str(trade_data.get('sz', '0')))
side = trade_data.get('side', 'unknown')
# Skip duplicate trades
if trade_id == self._last_trade_id:
return
self._last_trade_id = trade_id
# For now, we'll log the trade data
# Actual database storage will be implemented in the next phase
self.logger.debug(f"Trade: {self.symbol} - {side} {size} @ {price} (ID: {trade_id})")
# Store raw market data points in raw_trades table
with self._db_manager.get_session() as session:
raw_trade = RawTrade(
exchange="okx",
symbol=data_point.symbol,
timestamp=data_point.timestamp,
data_type=data_point.data_type.value,
raw_data=data_point.data
)
session.add(raw_trade)
self.logger.debug(f"Stored raw data: {data_point.data_type.value} for {data_point.symbol}")
except Exception as e:
self.logger.error(f"Error storing trade data for {self.symbol}: {e}")
self.logger.error(f"Error storing raw market data: {e}")
async def _store_completed_candle(self, candle: OHLCVCandle) -> None:
"""
Store completed OHLCV candle in the market_data table.
Args:
candle: Completed OHLCV candle
"""
try:
if not self._db_manager:
return
# Store completed candles in market_data table
with self._db_manager.get_session() as session:
market_data = MarketData(
exchange=candle.exchange,
symbol=candle.symbol,
timeframe=candle.timeframe,
timestamp=candle.start_time, # Use start_time as the candle timestamp
open=candle.open,
high=candle.high,
low=candle.low,
close=candle.close,
volume=candle.volume,
trades_count=candle.trade_count
)
session.add(market_data)
self.logger.info(f"Stored completed candle: {candle.symbol} {candle.timeframe} at {candle.start_time}")
except Exception as e:
self.logger.error(f"Error storing completed candle: {e}")
async def _store_raw_data(self, channel: str, raw_message: Dict[str, Any]) -> None:
"""
Store raw data for debugging and compliance.
Store raw WebSocket data for debugging in raw_trades table.
Args:
channel: OKX channel name
raw_message: Complete raw message
channel: Channel name
raw_message: Raw WebSocket message
"""
try:
if not self._raw_data_manager:
if not self._raw_data_manager or 'data' not in raw_message:
return
# Store raw data using the raw data manager
self._raw_data_manager.store_raw_data(
exchange="okx",
symbol=self.symbol,
data_type=channel,
raw_data=raw_message,
timestamp=datetime.now(timezone.utc)
)
# Store each data item as a separate raw data record
for data_item in raw_message['data']:
self._raw_data_manager.store_raw_data(
exchange="okx",
symbol=self.symbol,
data_type=f"raw_{channel}", # Prefix with 'raw_' to distinguish from processed data
raw_data=data_item,
timestamp=datetime.now(timezone.utc)
)
except Exception as e:
self.logger.error(f"Error storing raw data for {self.symbol}: {e}")
self.logger.error(f"Error storing raw WebSocket data: {e}")
def _on_message(self, message: Dict[str, Any]) -> None:
"""
Callback function for WebSocket messages.
Handle incoming WebSocket message.
Args:
message: Message received from WebSocket
message: WebSocket message from OKX
"""
try:
# Add message to buffer for processing
self._message_buffer.append(message)
# Process message asynchronously
asyncio.create_task(self._process_message(message))
except Exception as e:
self.logger.error(f"Error in message callback for {self.symbol}: {e}")
self.logger.error(f"Error handling WebSocket message: {e}")
def _on_trade_processed(self, trade: StandardizedTrade) -> None:
"""
Callback for processed trades from data processor.
Args:
trade: Processed standardized trade
"""
self._processed_trades += 1
self.logger.debug(f"Processed trade: {trade.symbol} {trade.side} {trade.size}@{trade.price}")
def _on_candle_processed(self, candle: OHLCVCandle) -> None:
"""
Callback for completed candles from data processor.
Args:
candle: Completed OHLCV candle
"""
self._processed_candles += 1
self.logger.info(f"Completed candle: {candle.symbol} {candle.timeframe} O:{candle.open} H:{candle.high} L:{candle.low} C:{candle.close} V:{candle.volume}")
# Store completed candle in market_data table
if candle.is_complete:
asyncio.create_task(self._store_completed_candle(candle))
def get_status(self) -> Dict[str, Any]:
"""Get collector status including WebSocket client status."""
"""
Get current collector status including processing statistics.
Returns:
Dictionary containing collector status information
"""
base_status = super().get_status()
# Add OKX-specific status
okx_status = {
'symbol': self.symbol,
'websocket_connected': self._ws_client.is_connected if self._ws_client else False,
'websocket_state': self._ws_client.connection_state.value if self._ws_client else 'disconnected',
'last_trade_id': self._last_trade_id,
'message_buffer_size': len(self._message_buffer),
'store_raw_data': self.store_raw_data
"symbol": self.symbol,
"websocket_connected": self._ws_client.is_connected if self._ws_client else False,
"websocket_state": self._ws_client.connection_state.value if self._ws_client else "disconnected",
"store_raw_data": self.store_raw_data,
"processing_stats": {
"messages_received": self._message_count,
"trades_processed": self._processed_trades,
"candles_processed": self._processed_candles,
"errors": self._error_count
}
}
# Add WebSocket stats if available
if self._ws_client:
okx_status['websocket_stats'] = self._ws_client.get_stats()
# Add data processor statistics
if self._data_processor:
okx_status["data_processor_stats"] = self._data_processor.get_processing_stats()
return {**base_status, **okx_status}
# Add WebSocket statistics
if self._ws_client:
okx_status["websocket_stats"] = self._ws_client.get_stats()
# Merge with base status
base_status.update(okx_status)
return base_status
def __repr__(self) -> str:
return f"<OKXCollector(symbol={self.symbol}, status={self.status.value}, data_types={[dt.value for dt in self.data_types]})>"
"""String representation of the collector."""
return f"OKXCollector(symbol='{self.symbol}', status='{self.status.value}', data_types={[dt.value for dt in self.data_types]})"

View File

@@ -0,0 +1,726 @@
"""
OKX-specific data processing utilities.
This module provides OKX-specific data validation, transformation, and processing
utilities that extend the common data processing framework.
"""
import re
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, List, Optional, Any, Union, Tuple
from enum import Enum
from ...base_collector import DataType, MarketDataPoint
from ...common import (
DataValidationResult,
StandardizedTrade,
OHLCVCandle,
CandleProcessingConfig,
RealTimeCandleProcessor,
BaseDataValidator,
ValidationResult,
BaseDataTransformer,
UnifiedDataTransformer,
create_standardized_trade
)
from utils.logger import get_logger
class OKXMessageType(Enum):
"""OKX WebSocket message types."""
DATA = "data"
SUBSCRIPTION_SUCCESS = "subscribe"
UNSUBSCRIPTION_SUCCESS = "unsubscribe"
ERROR = "error"
PING = "ping"
PONG = "pong"
class OKXTradeField(Enum):
"""OKX trade data field names."""
INST_ID = "instId"
TRADE_ID = "tradeId"
PRICE = "px"
SIZE = "sz"
SIDE = "side"
TIMESTAMP = "ts"
class OKXOrderbookField(Enum):
"""OKX orderbook data field names."""
INST_ID = "instId"
ASKS = "asks"
BIDS = "bids"
TIMESTAMP = "ts"
SEQID = "seqId"
class OKXTickerField(Enum):
"""OKX ticker data field names."""
INST_ID = "instId"
LAST = "last"
LAST_SZ = "lastSz"
ASK_PX = "askPx"
ASK_SZ = "askSz"
BID_PX = "bidPx"
BID_SZ = "bidSz"
OPEN_24H = "open24h"
HIGH_24H = "high24h"
LOW_24H = "low24h"
VOL_24H = "vol24h"
VOL_CNY_24H = "volCcy24h"
TIMESTAMP = "ts"
class OKXDataValidator(BaseDataValidator):
"""
OKX-specific data validator extending the common base validator.
This class provides OKX-specific validation for message formats,
symbol patterns, and data structures.
"""
def __init__(self, component_name: str = "okx_data_validator"):
"""Initialize OKX data validator."""
super().__init__("okx", component_name)
# OKX-specific patterns
self._symbol_pattern = re.compile(r'^[A-Z0-9]+-[A-Z0-9]+$') # BTC-USDT, ETH-USDC
self._trade_id_pattern = re.compile(r'^\d+$') # OKX uses numeric trade IDs
# OKX-specific valid channels
self._valid_channels = {
'trades', 'books5', 'books50', 'books-l2-tbt', 'tickers',
'candle1m', 'candle5m', 'candle15m', 'candle1H', 'candle4H', 'candle1D'
}
self.logger.debug("Initialized OKX data validator")
def validate_symbol_format(self, symbol: str) -> ValidationResult:
"""Validate OKX symbol format (e.g., BTC-USDT)."""
errors = []
warnings = []
if not isinstance(symbol, str):
errors.append(f"Symbol must be string, got {type(symbol)}")
return ValidationResult(False, errors, warnings)
if not self._symbol_pattern.match(symbol):
errors.append(f"Invalid OKX symbol format: {symbol}. Expected format: BASE-QUOTE (e.g., BTC-USDT)")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_websocket_message(self, message: Dict[str, Any]) -> DataValidationResult:
"""Validate OKX WebSocket message structure."""
errors = []
warnings = []
try:
# Check basic message structure
if not isinstance(message, dict):
errors.append(f"Message must be a dictionary, got {type(message)}")
return DataValidationResult(False, errors, warnings)
# Identify message type
message_type = self._identify_message_type(message)
if message_type == OKXMessageType.DATA:
return self._validate_data_message(message)
elif message_type in [OKXMessageType.SUBSCRIPTION_SUCCESS, OKXMessageType.UNSUBSCRIPTION_SUCCESS]:
return self._validate_subscription_message(message)
elif message_type == OKXMessageType.ERROR:
return self._validate_error_message(message)
elif message_type in [OKXMessageType.PING, OKXMessageType.PONG]:
return DataValidationResult(True, [], []) # Ping/pong are always valid
else:
warnings.append("Unknown message type, basic validation only")
return DataValidationResult(True, [], warnings)
except Exception as e:
errors.append(f"Exception during message validation: {str(e)}")
return DataValidationResult(False, errors, warnings)
def validate_trade_data(self, data: Dict[str, Any], symbol: Optional[str] = None) -> DataValidationResult:
"""Validate OKX trade data structure and values."""
errors = []
warnings = []
sanitized_data = data.copy()
try:
# Check required fields
required_fields = [field.value for field in OKXTradeField]
missing_fields = []
for field in required_fields:
if field not in data:
missing_fields.append(field)
if missing_fields:
errors.extend([f"Missing required trade field: {field}" for field in missing_fields])
return DataValidationResult(False, errors, warnings)
# Validate individual fields using base validator methods
symbol_result = self.validate_symbol_format(data[OKXTradeField.INST_ID.value])
if not symbol_result.is_valid:
errors.extend(symbol_result.errors)
if symbol:
match_result = self.validate_symbol_match(data[OKXTradeField.INST_ID.value], symbol)
warnings.extend(match_result.warnings)
trade_id_result = self.validate_trade_id(data[OKXTradeField.TRADE_ID.value])
if not trade_id_result.is_valid:
errors.extend(trade_id_result.errors)
warnings.extend(trade_id_result.warnings)
price_result = self.validate_price(data[OKXTradeField.PRICE.value])
if not price_result.is_valid:
errors.extend(price_result.errors)
else:
sanitized_data[OKXTradeField.PRICE.value] = str(price_result.sanitized_data)
warnings.extend(price_result.warnings)
size_result = self.validate_size(data[OKXTradeField.SIZE.value])
if not size_result.is_valid:
errors.extend(size_result.errors)
else:
sanitized_data[OKXTradeField.SIZE.value] = str(size_result.sanitized_data)
warnings.extend(size_result.warnings)
side_result = self.validate_trade_side(data[OKXTradeField.SIDE.value])
if not side_result.is_valid:
errors.extend(side_result.errors)
timestamp_result = self.validate_timestamp(data[OKXTradeField.TIMESTAMP.value])
if not timestamp_result.is_valid:
errors.extend(timestamp_result.errors)
warnings.extend(timestamp_result.warnings)
return DataValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
except Exception as e:
errors.append(f"Exception during trade validation: {str(e)}")
return DataValidationResult(False, errors, warnings)
def validate_orderbook_data(self, data: Dict[str, Any], symbol: Optional[str] = None) -> DataValidationResult:
"""Validate OKX orderbook data structure and values."""
errors = []
warnings = []
sanitized_data = data.copy()
try:
# Check required fields
required_fields = [OKXOrderbookField.INST_ID.value, OKXOrderbookField.ASKS.value,
OKXOrderbookField.BIDS.value, OKXOrderbookField.TIMESTAMP.value]
missing_fields = []
for field in required_fields:
if field not in data:
missing_fields.append(field)
if missing_fields:
errors.extend([f"Missing required orderbook field: {field}" for field in missing_fields])
return DataValidationResult(False, errors, warnings)
# Validate symbol
symbol_result = self.validate_symbol_format(data[OKXOrderbookField.INST_ID.value])
if not symbol_result.is_valid:
errors.extend(symbol_result.errors)
if symbol:
match_result = self.validate_symbol_match(data[OKXOrderbookField.INST_ID.value], symbol)
warnings.extend(match_result.warnings)
# Validate timestamp
timestamp_result = self.validate_timestamp(data[OKXOrderbookField.TIMESTAMP.value])
if not timestamp_result.is_valid:
errors.extend(timestamp_result.errors)
warnings.extend(timestamp_result.warnings)
# Validate asks and bids using base validator
asks_result = self.validate_orderbook_side(data[OKXOrderbookField.ASKS.value], "asks")
if not asks_result.is_valid:
errors.extend(asks_result.errors)
else:
sanitized_data[OKXOrderbookField.ASKS.value] = asks_result.sanitized_data
warnings.extend(asks_result.warnings)
bids_result = self.validate_orderbook_side(data[OKXOrderbookField.BIDS.value], "bids")
if not bids_result.is_valid:
errors.extend(bids_result.errors)
else:
sanitized_data[OKXOrderbookField.BIDS.value] = bids_result.sanitized_data
warnings.extend(bids_result.warnings)
# Validate sequence ID if present
if OKXOrderbookField.SEQID.value in data:
seq_id = data[OKXOrderbookField.SEQID.value]
if not isinstance(seq_id, (int, str)) or (isinstance(seq_id, str) and not seq_id.isdigit()):
errors.append("Invalid sequence ID format")
return DataValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
except Exception as e:
errors.append(f"Exception during orderbook validation: {str(e)}")
return DataValidationResult(False, errors, warnings)
def validate_ticker_data(self, data: Dict[str, Any], symbol: Optional[str] = None) -> DataValidationResult:
"""Validate OKX ticker data structure and values."""
errors = []
warnings = []
sanitized_data = data.copy()
try:
# Check required fields
required_fields = [OKXTickerField.INST_ID.value, OKXTickerField.LAST.value, OKXTickerField.TIMESTAMP.value]
missing_fields = []
for field in required_fields:
if field not in data:
missing_fields.append(field)
if missing_fields:
errors.extend([f"Missing required ticker field: {field}" for field in missing_fields])
return DataValidationResult(False, errors, warnings)
# Validate symbol
symbol_result = self.validate_symbol_format(data[OKXTickerField.INST_ID.value])
if not symbol_result.is_valid:
errors.extend(symbol_result.errors)
if symbol:
match_result = self.validate_symbol_match(data[OKXTickerField.INST_ID.value], symbol)
warnings.extend(match_result.warnings)
# Validate timestamp
timestamp_result = self.validate_timestamp(data[OKXTickerField.TIMESTAMP.value])
if not timestamp_result.is_valid:
errors.extend(timestamp_result.errors)
warnings.extend(timestamp_result.warnings)
# Validate price fields (optional fields)
price_fields = [OKXTickerField.LAST, OKXTickerField.ASK_PX, OKXTickerField.BID_PX,
OKXTickerField.OPEN_24H, OKXTickerField.HIGH_24H, OKXTickerField.LOW_24H]
for field in price_fields:
if field.value in data and data[field.value] not in [None, ""]:
price_result = self.validate_price(data[field.value])
if not price_result.is_valid:
errors.extend([f"{field.value}: {error}" for error in price_result.errors])
else:
sanitized_data[field.value] = str(price_result.sanitized_data)
warnings.extend([f"{field.value}: {warning}" for warning in price_result.warnings])
# Validate size fields (optional fields)
size_fields = [OKXTickerField.LAST_SZ, OKXTickerField.ASK_SZ, OKXTickerField.BID_SZ]
for field in size_fields:
if field.value in data and data[field.value] not in [None, ""]:
size_result = self.validate_size(data[field.value])
if not size_result.is_valid:
errors.extend([f"{field.value}: {error}" for error in size_result.errors])
else:
sanitized_data[field.value] = str(size_result.sanitized_data)
warnings.extend([f"{field.value}: {warning}" for warning in size_result.warnings])
# Validate volume fields (optional fields)
volume_fields = [OKXTickerField.VOL_24H, OKXTickerField.VOL_CNY_24H]
for field in volume_fields:
if field.value in data and data[field.value] not in [None, ""]:
volume_result = self.validate_volume(data[field.value])
if not volume_result.is_valid:
errors.extend([f"{field.value}: {error}" for error in volume_result.errors])
warnings.extend([f"{field.value}: {warning}" for warning in volume_result.warnings])
return DataValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
except Exception as e:
errors.append(f"Exception during ticker validation: {str(e)}")
return DataValidationResult(False, errors, warnings)
# Private helper methods for OKX-specific validation
def _identify_message_type(self, message: Dict[str, Any]) -> OKXMessageType:
"""Identify the type of OKX WebSocket message."""
if 'event' in message:
event = message['event']
if event == 'subscribe':
return OKXMessageType.SUBSCRIPTION_SUCCESS
elif event == 'unsubscribe':
return OKXMessageType.UNSUBSCRIPTION_SUCCESS
elif event == 'error':
return OKXMessageType.ERROR
if 'data' in message and 'arg' in message:
return OKXMessageType.DATA
# Default to data type for unknown messages
return OKXMessageType.DATA
def _validate_data_message(self, message: Dict[str, Any]) -> DataValidationResult:
"""Validate OKX data message structure."""
errors = []
warnings = []
# Check required fields
if 'arg' not in message:
errors.append("Missing 'arg' field in data message")
if 'data' not in message:
errors.append("Missing 'data' field in data message")
if errors:
return DataValidationResult(False, errors, warnings)
# Validate arg structure
arg = message['arg']
if not isinstance(arg, dict):
errors.append("'arg' field must be a dictionary")
else:
if 'channel' not in arg:
errors.append("Missing 'channel' in arg")
elif arg['channel'] not in self._valid_channels:
warnings.append(f"Unknown channel: {arg['channel']}")
if 'instId' not in arg:
errors.append("Missing 'instId' in arg")
# Validate data structure
data = message['data']
if not isinstance(data, list):
errors.append("'data' field must be a list")
elif len(data) == 0:
warnings.append("Empty data array")
return DataValidationResult(len(errors) == 0, errors, warnings)
def _validate_subscription_message(self, message: Dict[str, Any]) -> DataValidationResult:
"""Validate subscription/unsubscription message."""
errors = []
warnings = []
if 'event' not in message:
errors.append("Missing 'event' field")
if 'arg' not in message:
errors.append("Missing 'arg' field")
return DataValidationResult(len(errors) == 0, errors, warnings)
def _validate_error_message(self, message: Dict[str, Any]) -> DataValidationResult:
"""Validate error message."""
errors = []
warnings = []
if 'event' not in message or message['event'] != 'error':
errors.append("Invalid error message structure")
if 'msg' in message:
warnings.append(f"OKX error: {message['msg']}")
return DataValidationResult(len(errors) == 0, errors, warnings)
class OKXDataTransformer(BaseDataTransformer):
"""
OKX-specific data transformer extending the common base transformer.
This class handles transformation of OKX data formats to standardized formats.
"""
def __init__(self, component_name: str = "okx_data_transformer"):
"""Initialize OKX data transformer."""
super().__init__("okx", component_name)
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""Transform OKX trade data to standardized format."""
try:
return create_standardized_trade(
symbol=raw_data[OKXTradeField.INST_ID.value],
trade_id=raw_data[OKXTradeField.TRADE_ID.value],
price=raw_data[OKXTradeField.PRICE.value],
size=raw_data[OKXTradeField.SIZE.value],
side=raw_data[OKXTradeField.SIDE.value],
timestamp=raw_data[OKXTradeField.TIMESTAMP.value],
exchange="okx",
raw_data=raw_data,
is_milliseconds=True
)
except Exception as e:
self.logger.error(f"Error transforming OKX trade data: {e}")
return None
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform OKX orderbook data to standardized format."""
try:
# Basic transformation - can be enhanced as needed
return {
'symbol': raw_data[OKXOrderbookField.INST_ID.value],
'asks': raw_data[OKXOrderbookField.ASKS.value],
'bids': raw_data[OKXOrderbookField.BIDS.value],
'timestamp': self.timestamp_to_datetime(raw_data[OKXOrderbookField.TIMESTAMP.value]),
'exchange': 'okx',
'raw_data': raw_data
}
except Exception as e:
self.logger.error(f"Error transforming OKX orderbook data: {e}")
return None
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform OKX ticker data to standardized format."""
try:
# Transform ticker data to standardized format
ticker_data = {
'symbol': raw_data[OKXTickerField.INST_ID.value],
'timestamp': self.timestamp_to_datetime(raw_data[OKXTickerField.TIMESTAMP.value]),
'exchange': 'okx',
'raw_data': raw_data
}
# Add available price fields
price_fields = {
'last': OKXTickerField.LAST.value,
'bid': OKXTickerField.BID_PX.value,
'ask': OKXTickerField.ASK_PX.value,
'open_24h': OKXTickerField.OPEN_24H.value,
'high_24h': OKXTickerField.HIGH_24H.value,
'low_24h': OKXTickerField.LOW_24H.value
}
for std_field, okx_field in price_fields.items():
if okx_field in raw_data and raw_data[okx_field] not in [None, ""]:
decimal_price = self.safe_decimal_conversion(raw_data[okx_field], std_field)
if decimal_price:
ticker_data[std_field] = decimal_price
# Add volume fields
if OKXTickerField.VOL_24H.value in raw_data:
volume = self.safe_decimal_conversion(raw_data[OKXTickerField.VOL_24H.value], 'volume_24h')
if volume:
ticker_data['volume_24h'] = volume
return ticker_data
except Exception as e:
self.logger.error(f"Error transforming OKX ticker data: {e}")
return None
class OKXDataProcessor:
"""
Main OKX data processor using common utilities.
This class provides a simplified interface for OKX data processing,
leveraging the common validation, transformation, and aggregation utilities.
"""
def __init__(self,
symbol: str,
config: Optional[CandleProcessingConfig] = None,
component_name: str = "okx_data_processor"):
"""
Initialize OKX data processor.
Args:
symbol: Trading symbol to process
config: Candle processing configuration
component_name: Name for logging
"""
self.symbol = symbol
self.component_name = component_name
self.logger = get_logger(self.component_name)
# Core components using common utilities
self.validator = OKXDataValidator(f"{component_name}_validator")
self.transformer = OKXDataTransformer(f"{component_name}_transformer")
self.unified_transformer = UnifiedDataTransformer(self.transformer, f"{component_name}_unified")
# Real-time candle processing using common utilities
self.config = config or CandleProcessingConfig()
self.candle_processor = RealTimeCandleProcessor(
symbol, "okx", self.config, f"{component_name}_candles"
)
# Callbacks
self.trade_callbacks: List[callable] = []
self.candle_callbacks: List[callable] = []
# Connect candle processor callbacks
self.candle_processor.add_candle_callback(self._emit_candle_to_callbacks)
self.logger.info(f"Initialized OKX data processor for {symbol} with real-time candle processing")
def add_trade_callback(self, callback: callable) -> None:
"""Add callback for processed trades."""
self.trade_callbacks.append(callback)
def add_candle_callback(self, callback: callable) -> None:
"""Add callback for completed candles."""
self.candle_callbacks.append(callback)
def validate_and_process_message(self, message: Dict[str, Any], expected_symbol: Optional[str] = None) -> Tuple[bool, List[MarketDataPoint], List[str]]:
"""
Validate and process complete OKX WebSocket message.
This is the main entry point for real-time WebSocket data.
Args:
message: Complete WebSocket message from OKX
expected_symbol: Expected trading symbol for validation
Returns:
Tuple of (success, list of market data points, list of errors)
"""
try:
# First validate the message structure
validation_result = self.validator.validate_websocket_message(message)
if not validation_result.is_valid:
self.logger.error(f"Message validation failed: {validation_result.errors}")
return False, [], validation_result.errors
# Log warnings if any
if validation_result.warnings:
self.logger.warning(f"Message validation warnings: {validation_result.warnings}")
# Process data if it's a data message
if 'data' in message and 'arg' in message:
return self._process_data_message(message, expected_symbol)
# Non-data messages are considered successfully processed but return no data points
return True, [], []
except Exception as e:
error_msg = f"Exception during message validation and processing: {str(e)}"
self.logger.error(error_msg)
return False, [], [error_msg]
def _process_data_message(self, message: Dict[str, Any], expected_symbol: Optional[str] = None) -> Tuple[bool, List[MarketDataPoint], List[str]]:
"""Process OKX data message and return market data points."""
errors = []
market_data_points = []
try:
arg = message['arg']
channel = arg['channel']
inst_id = arg['instId']
data_list = message['data']
# Determine data type from channel
data_type = self._channel_to_data_type(channel)
if not data_type:
errors.append(f"Unsupported channel: {channel}")
return False, [], errors
# Process each data item
for data_item in data_list:
try:
# Validate and transform based on channel type
if channel == 'trades':
validation_result = self.validator.validate_trade_data(data_item, expected_symbol)
elif channel in ['books5', 'books50', 'books-l2-tbt']:
validation_result = self.validator.validate_orderbook_data(data_item, expected_symbol)
elif channel == 'tickers':
validation_result = self.validator.validate_ticker_data(data_item, expected_symbol)
else:
errors.append(f"Unsupported channel for validation: {channel}")
continue
if not validation_result.is_valid:
errors.extend(validation_result.errors)
continue
if validation_result.warnings:
self.logger.warning(f"Data validation warnings: {validation_result.warnings}")
# Create MarketDataPoint using sanitized data
sanitized_data = validation_result.sanitized_data or data_item
timestamp_ms = sanitized_data.get('ts')
if timestamp_ms:
timestamp = datetime.fromtimestamp(int(timestamp_ms) / 1000, tz=timezone.utc)
else:
timestamp = datetime.now(timezone.utc)
market_data_point = MarketDataPoint(
exchange="okx",
symbol=inst_id,
timestamp=timestamp,
data_type=data_type,
data=sanitized_data
)
market_data_points.append(market_data_point)
# Real-time processing for trades
if channel == 'trades' and inst_id == self.symbol:
self._process_real_time_trade(sanitized_data)
except Exception as e:
self.logger.error(f"Error processing data item: {e}")
errors.append(f"Error processing data item: {str(e)}")
return len(errors) == 0, market_data_points, errors
except Exception as e:
error_msg = f"Exception during data message processing: {str(e)}"
errors.append(error_msg)
return False, [], errors
def _process_real_time_trade(self, trade_data: Dict[str, Any]) -> None:
"""Process real-time trade for candle generation."""
try:
# Transform to standardized format using the unified transformer
standardized_trade = self.unified_transformer.transform_trade_data(trade_data, self.symbol)
if standardized_trade:
# Process for real-time candles using common utilities
completed_candles = self.candle_processor.process_trade(standardized_trade)
# Emit trade to callbacks
for callback in self.trade_callbacks:
try:
callback(standardized_trade)
except Exception as e:
self.logger.error(f"Error in trade callback: {e}")
# Note: Candle callbacks are handled by _emit_candle_to_callbacks
except Exception as e:
self.logger.error(f"Error processing real-time trade: {e}")
def _emit_candle_to_callbacks(self, candle: OHLCVCandle) -> None:
"""Emit candle to all registered callbacks."""
for callback in self.candle_callbacks:
try:
callback(candle)
except Exception as e:
self.logger.error(f"Error in candle callback: {e}")
def _channel_to_data_type(self, channel: str) -> Optional[DataType]:
"""Convert OKX channel name to DataType enum."""
channel_mapping = {
'trades': DataType.TRADE,
'books5': DataType.ORDERBOOK,
'books50': DataType.ORDERBOOK,
'books-l2-tbt': DataType.ORDERBOOK,
'tickers': DataType.TICKER
}
return channel_mapping.get(channel)
def get_processing_stats(self) -> Dict[str, Any]:
"""Get comprehensive processing statistics."""
return {
'candle_processor': self.candle_processor.get_stats(),
'current_candles': self.candle_processor.get_current_candles(),
'callbacks': {
'trade_callbacks': len(self.trade_callbacks),
'candle_callbacks': len(self.candle_callbacks)
},
'validator_info': self.validator.get_validator_info(),
'transformer_info': self.unified_transformer.get_transformer_info()
}
__all__ = [
'OKXMessageType',
'OKXTradeField',
'OKXOrderbookField',
'OKXTickerField',
'OKXDataValidator',
'OKXDataTransformer',
'OKXDataProcessor'
]