Ajasra 90cb450640 Remove OKX configuration file and enhance data collector with timeframes support
- Deleted the `okx_config.json` file as part of the configuration refactor.
- Updated `BaseDataCollector` to include an optional `timeframes` parameter for more flexible data collection.
- Modified `DataCollectionService` and `OKXCollector` to pass and utilize the new `timeframes` parameter.
- Enhanced `ExchangeCollectorConfig` to validate timeframes, ensuring they are provided and correctly formatted.
- Updated documentation to reflect the new configurable timeframes feature, improving clarity for users.

These changes streamline the configuration process and improve the flexibility of data collection, aligning with project standards for maintainability and usability.
2025-06-07 15:46:24 +08:00

550 lines
23 KiB
Python

"""
OKX Data Collector implementation.
This module provides the main OKX data collector class that extends BaseDataCollector,
handling real-time market data collection for a single trading pair with robust
error handling, health monitoring, and database integration.
"""
import asyncio
from datetime import datetime, timezone
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from ...base_collector import (
BaseDataCollector, DataType, CollectorStatus, MarketDataPoint,
OHLCVData, DataValidationError, ConnectionError
)
from ...common import (
StandardizedTrade, OHLCVCandle, CandleProcessingConfig
)
from .websocket import (
OKXWebSocketClient, OKXSubscription, OKXChannelType,
ConnectionState, OKXWebSocketError
)
from .data_processor import OKXDataProcessor
from database.operations import get_database_operations, DatabaseOperationError
from database.models import MarketData, RawTrade
@dataclass
class OKXMarketData:
"""OKX-specific market data structure."""
symbol: str
timestamp: datetime
data_type: str
channel: str
raw_data: Dict[str, Any]
class OKXCollector(BaseDataCollector):
"""
OKX data collector for real-time market data.
This collector handles a single trading pair and collects real-time data
including trades, orderbook, and ticker information from OKX exchange.
Uses the new common data processing framework for validation, transformation,
and aggregation.
"""
def __init__(self,
symbol: str,
data_types: Optional[List[DataType]] = None,
component_name: Optional[str] = None,
auto_restart: bool = True,
health_check_interval: float = 30.0,
store_raw_data: bool = True,
force_update_candles: bool = False,
timeframes: Optional[List[str]] = None,
candle_config: Optional[CandleProcessingConfig] = None,
logger = None,
log_errors_only: bool = False):
"""
Initialize OKX collector for a single trading pair.
Args:
symbol: Trading symbol (e.g., 'BTC-USDT')
data_types: Types of data to collect (default: [DataType.TRADE, DataType.ORDERBOOK])
component_name: Name for logging (default: f'okx_collector_{symbol}')
auto_restart: Enable automatic restart on failures
health_check_interval: Seconds between health checks
store_raw_data: Whether to store raw data for debugging
force_update_candles: If True, update existing candles; if False, keep existing candles unchanged
timeframes: List of timeframes to collect (e.g., ['1s', '5s', '1m'])
candle_config: Optional CandleProcessingConfig instance (will create one if not provided)
logger: Logger instance for conditional logging (None for no logging)
log_errors_only: If True and logger provided, only log error-level messages
"""
# Default data types if not specified
if data_types is None:
data_types = [DataType.TRADE, DataType.ORDERBOOK]
# Component name for logging
if component_name is None:
component_name = f"okx_collector_{symbol.replace('-', '_').lower()}"
# Initialize base collector
super().__init__(
exchange_name="okx",
symbols=[symbol],
data_types=data_types,
timeframes=timeframes, # Pass timeframes to base collector
component_name=component_name,
auto_restart=auto_restart,
health_check_interval=health_check_interval,
logger=logger,
log_errors_only=log_errors_only
)
# OKX-specific settings
self.symbol = symbol
self.store_raw_data = store_raw_data
self.force_update_candles = force_update_candles
# WebSocket client
self._ws_client: Optional[OKXWebSocketClient] = None
# Data processor using new common framework
self._data_processor = OKXDataProcessor(
symbol,
config=candle_config or CandleProcessingConfig(timeframes=self.timeframes), # Use provided config or create new one
component_name=f"{component_name}_processor",
logger=logger
)
# Add callbacks for processed data
self._data_processor.add_trade_callback(self._on_trade_processed)
self._data_processor.add_candle_callback(self._on_candle_processed)
# Database operations using new repository pattern
self._db_operations = None
# Data processing counters
self._message_count = 0
self._processed_trades = 0
self._processed_candles = 0
self._error_count = 0
# OKX channel mapping
self._channel_mapping = {
DataType.TRADE: OKXChannelType.TRADES.value,
DataType.ORDERBOOK: OKXChannelType.BOOKS5.value,
DataType.TICKER: OKXChannelType.TICKERS.value
}
if logger:
logger.info(f"{component_name}: Initialized OKX collector for {symbol} with data types: {[dt.value for dt in data_types]}")
logger.info(f"{component_name}: Using timeframes: {self.timeframes}")
logger.info(f"{component_name}: Using common data processing framework")
async def connect(self) -> bool:
"""
Establish connection to OKX WebSocket API.
Returns:
True if connection successful, False otherwise
"""
try:
if self.logger:
self.logger.info(f"{self.component_name}: Connecting OKX collector for {self.symbol}")
# Initialize database operations using repository pattern
self._db_operations = get_database_operations(self.logger)
# Create WebSocket client
ws_component_name = f"okx_ws_{self.symbol.replace('-', '_').lower()}"
self._ws_client = OKXWebSocketClient(
component_name=ws_component_name,
ping_interval=25.0,
pong_timeout=10.0,
max_reconnect_attempts=5,
reconnect_delay=5.0,
logger=self.logger # Pass the logger to enable ping/pong logging
)
# Add message callback
self._ws_client.add_message_callback(self._on_message)
# Connect to WebSocket
if not await self._ws_client.connect(use_public=True):
if self.logger:
self.logger.error(f"{self.component_name}: Failed to connect to OKX WebSocket")
return False
if self.logger:
self.logger.info(f"{self.component_name}: Successfully connected OKX collector for {self.symbol}")
return True
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error connecting OKX collector for {self.symbol}: {e}")
return False
async def disconnect(self) -> None:
"""Disconnect from OKX WebSocket API."""
try:
if self.logger:
self.logger.info(f"{self.component_name}: Disconnecting OKX collector for {self.symbol}")
if self._ws_client:
await self._ws_client.disconnect()
self._ws_client = None
if self.logger:
self.logger.info(f"{self.component_name}: Disconnected OKX collector for {self.symbol}")
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error disconnecting OKX collector for {self.symbol}: {e}")
async def subscribe_to_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
"""
Subscribe to data streams for specified symbols and data types.
Args:
symbols: Trading symbols to subscribe to (should contain self.symbol)
data_types: Types of data to subscribe to
Returns:
True if subscription successful, False otherwise
"""
if not self._ws_client or not self._ws_client.is_connected:
if self.logger:
self.logger.error(f"{self.component_name}: WebSocket client not connected")
return False
# Validate symbol
if self.symbol not in symbols:
if self.logger:
self.logger.warning(f"{self.component_name}: Symbol {self.symbol} not in subscription list: {symbols}")
return False
try:
# Build subscriptions
subscriptions = []
for data_type in data_types:
if data_type in self._channel_mapping:
channel = self._channel_mapping[data_type]
subscription = OKXSubscription(
channel=channel,
inst_id=self.symbol,
enabled=True
)
subscriptions.append(subscription)
if self.logger:
self.logger.debug(f"{self.component_name}: Added subscription: {channel} for {self.symbol}")
else:
if self.logger:
self.logger.warning(f"{self.component_name}: Unsupported data type: {data_type}")
if not subscriptions:
if self.logger:
self.logger.warning(f"{self.component_name}: No valid subscriptions to create")
return False
# Subscribe to channels
success = await self._ws_client.subscribe(subscriptions)
if success:
if self.logger:
self.logger.info(f"{self.component_name}: Successfully subscribed to {len(subscriptions)} channels for {self.symbol}")
return True
else:
if self.logger:
self.logger.error(f"{self.component_name}: Failed to subscribe to channels for {self.symbol}")
return False
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error subscribing to data for {self.symbol}: {e}")
return False
async def unsubscribe_from_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
"""
Unsubscribe from data streams for specified symbols and data types.
Args:
symbols: Trading symbols to unsubscribe from
data_types: Types of data to unsubscribe from
Returns:
True if unsubscription successful, False otherwise
"""
if not self._ws_client or not self._ws_client.is_connected:
if self.logger:
self.logger.warning(f"{self.component_name}: WebSocket client not connected")
return True # Consider it successful if not connected
try:
# Build unsubscription list
subscriptions = []
for data_type in data_types:
if data_type in self._channel_mapping:
channel = self._channel_mapping[data_type]
subscription = OKXSubscription(
channel=channel,
inst_id=self.symbol,
enabled=False # False for unsubscribe
)
subscriptions.append(subscription)
if not subscriptions:
return True
# Unsubscribe from channels
success = await self._ws_client.unsubscribe(subscriptions)
if success:
if self.logger:
self.logger.info(f"{self.component_name}: Successfully unsubscribed from {len(subscriptions)} channels for {self.symbol}")
return True
else:
if self.logger:
self.logger.error(f"{self.component_name}: Failed to unsubscribe from channels for {self.symbol}")
return False
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error unsubscribing from data for {self.symbol}: {e}")
return False
async def _process_message(self, message: Any) -> Optional[MarketDataPoint]:
"""
Process received message using the new data processor.
Args:
message: Raw message from WebSocket
Returns:
MarketDataPoint if processing successful, None otherwise
"""
if not isinstance(message, dict):
if self.logger:
self.logger.warning(f"{self.component_name}: Received non-dict message: {type(message)}")
return None
try:
self._message_count += 1
# Use the new data processor for validation and processing
success, market_data_points, errors = self._data_processor.validate_and_process_message(
message, expected_symbol=self.symbol
)
if not success:
self._error_count += 1
if self.logger:
self.logger.error(f"{self.component_name}: Message processing failed: {errors}")
return None
if errors:
if self.logger:
self.logger.warning(f"{self.component_name}: Message processing warnings: {errors}")
# Store raw data if enabled (for debugging/compliance)
if self.store_raw_data:
if 'data' in message and 'arg' in message:
await self._store_raw_data(message['arg'].get('channel', 'unknown'), message)
# Store processed market data points in raw_trades table
for data_point in market_data_points:
await self._store_processed_data(data_point)
# Return the first data point for compatibility (most use cases have single data point per message)
return market_data_points[0] if market_data_points else None
except Exception as e:
self._error_count += 1
if self.logger:
self.logger.error(f"{self.component_name}: Error processing message: {e}")
return None
async def _handle_messages(self) -> None:
"""Handle message processing in the background."""
# The new data processor handles messages through callbacks
# This method exists for compatibility with BaseDataCollector
# Update heartbeat to indicate the message loop is active
self._last_heartbeat = datetime.now(timezone.utc)
# Check if we're receiving WebSocket messages
if self._ws_client and self._ws_client.is_connected:
# Update last data received timestamp if WebSocket is connected and active
self._last_data_received = datetime.now(timezone.utc)
# Short sleep to prevent busy loop while maintaining heartbeat
await asyncio.sleep(0.1)
async def _store_processed_data(self, data_point: MarketDataPoint) -> None:
"""
Store raw market data in the raw_trades table.
Args:
data_point: Raw market data point (trade, orderbook, ticker)
"""
try:
if not self._db_operations:
return
# Store raw market data points in raw_trades table using repository
success = self._db_operations.raw_trades.insert_market_data_point(data_point)
if success and self.logger:
self.logger.debug(f"{self.component_name}: Stored raw data: {data_point.data_type.value} for {data_point.symbol}")
except DatabaseOperationError as e:
if self.logger:
self.logger.error(f"{self.component_name}: Database error storing raw market data: {e}")
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error storing raw market data: {e}")
async def _store_completed_candle(self, candle: OHLCVCandle) -> None:
"""
Store completed OHLCV candle in the market_data table.
Handles duplicate candles based on force_update_candles setting:
- If force_update_candles=True: UPDATE existing records with latest values
- If force_update_candles=False: IGNORE duplicates, keep existing records unchanged
Args:
candle: Completed OHLCV candle
"""
try:
if not self._db_operations:
return
# Store completed candles using repository pattern
success = self._db_operations.market_data.upsert_candle(candle, self.force_update_candles)
if success and self.logger:
action = "Updated" if self.force_update_candles else "Stored"
self.logger.debug(f"{self.component_name}: {action} candle: {candle.symbol} {candle.timeframe} at {candle.end_time} (force_update={self.force_update_candles}) - OHLCV: {candle.open}/{candle.high}/{candle.low}/{candle.close}, Vol: {candle.volume}, Trades: {candle.trade_count}")
except DatabaseOperationError as e:
if self.logger:
self.logger.error(f"{self.component_name}: Database error storing completed candle: {e}")
# Log candle details for debugging
self.logger.error(f"{self.component_name}: Failed candle details: {candle.symbol} {candle.timeframe} {candle.end_time} - OHLCV: {candle.open}/{candle.high}/{candle.low}/{candle.close}")
self._error_count += 1
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error storing completed candle: {e}")
# Log candle details for debugging
self.logger.error(f"{self.component_name}: Failed candle details: {candle.symbol} {candle.timeframe} {candle.end_time} - OHLCV: {candle.open}/{candle.high}/{candle.low}/{candle.close}")
self._error_count += 1
async def _store_raw_data(self, channel: str, raw_message: Dict[str, Any]) -> None:
"""
Store raw WebSocket data for debugging in raw_trades table.
Args:
channel: Channel name
raw_message: Raw WebSocket message
"""
try:
if not self._db_operations or 'data' not in raw_message:
return
# Store each data item as a separate raw data record using repository
for data_item in raw_message['data']:
success = self._db_operations.raw_trades.insert_raw_websocket_data(
exchange="okx",
symbol=self.symbol,
data_type=f"raw_{channel}", # Prefix with 'raw_' to distinguish from processed data
raw_data=data_item,
timestamp=datetime.now(timezone.utc)
)
if not success and self.logger:
self.logger.warning(f"{self.component_name}: Failed to store raw WebSocket data for {channel}")
except DatabaseOperationError as e:
if self.logger:
self.logger.error(f"{self.component_name}: Database error storing raw WebSocket data: {e}")
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error storing raw WebSocket data: {e}")
def _on_message(self, message: Dict[str, Any]) -> None:
"""
Handle incoming WebSocket message.
Args:
message: WebSocket message from OKX
"""
try:
# Update heartbeat and data received timestamps
current_time = datetime.now(timezone.utc)
self._last_heartbeat = current_time
self._last_data_received = current_time
self._message_count += 1
# Process message asynchronously
asyncio.create_task(self._process_message(message))
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error handling WebSocket message: {e}")
def _on_trade_processed(self, trade: StandardizedTrade) -> None:
"""
Callback for processed trades from data processor.
Args:
trade: Processed standardized trade
"""
self._processed_trades += 1
if self.logger:
self.logger.debug(f"{self.component_name}: Processed trade: {trade.symbol} {trade.side} {trade.size}@{trade.price}")
def _on_candle_processed(self, candle: OHLCVCandle) -> None:
"""
Callback for completed candles from data processor.
Args:
candle: Completed OHLCV candle
"""
self._processed_candles += 1
if self.logger:
self.logger.debug(f"{self.component_name}: Completed candle: {candle.symbol} {candle.timeframe} O:{candle.open} H:{candle.high} L:{candle.low} C:{candle.close} V:{candle.volume}")
# Store completed candle in market_data table
if candle.is_complete:
asyncio.create_task(self._store_completed_candle(candle))
def get_status(self) -> Dict[str, Any]:
"""
Get current collector status including processing statistics.
Returns:
Dictionary containing collector status information
"""
base_status = super().get_status()
# Add OKX-specific status
okx_status = {
"symbol": self.symbol,
"websocket_connected": self._ws_client.is_connected if self._ws_client else False,
"websocket_state": self._ws_client.connection_state.value if self._ws_client else "disconnected",
"store_raw_data": self.store_raw_data,
"force_update_candles": self.force_update_candles,
"timeframes": self.timeframes,
"processing_stats": {
"messages_received": self._message_count,
"trades_processed": self._processed_trades,
"candles_processed": self._processed_candles,
"errors": self._error_count
}
}
# Add data processor statistics
if self._data_processor:
okx_status["data_processor_stats"] = self._data_processor.get_processing_stats()
# Add WebSocket statistics
if self._ws_client:
okx_status["websocket_stats"] = self._ws_client.get_stats()
# Merge with base status
base_status.update(okx_status)
return base_status
def __repr__(self) -> str:
"""String representation of the collector."""
return f"OKXCollector(symbol='{self.symbol}', status='{self.status.value}', data_types={[dt.value for dt in self.data_types]})"