Vasily.onl f6cb1485b1 Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.

These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00

516 lines
21 KiB
Python

"""
OKX Data Collector implementation.
This module provides the main OKX data collector class that extends BaseDataCollector,
handling real-time market data collection for a single trading pair with robust
error handling, health monitoring, and database integration.
"""
import asyncio
from datetime import datetime, timezone
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from ...collector.base_collector import (
BaseDataCollector, DataType, CollectorStatus, MarketDataPoint,
OHLCVData, DataValidationError, ConnectionError
)
from ...common import (
StandardizedTrade, OHLCVCandle, CandleProcessingConfig
)
from .websocket import (
OKXWebSocketClient, OKXSubscription, OKXChannelType,
ConnectionState, OKXWebSocketError
)
from .data_processor import OKXDataProcessor
from database.operations import get_database_operations, DatabaseOperationError
from database.models import MarketData, RawTrade
@dataclass
class OKXMarketData:
"""OKX-specific market data structure."""
symbol: str
timestamp: datetime
data_type: str
channel: str
raw_data: Dict[str, Any]
class OKXCollector(BaseDataCollector):
"""
OKX data collector for real-time market data.
This collector handles a single trading pair and collects real-time data
including trades, orderbook, and ticker information from OKX exchange.
Uses the new common data processing framework for validation, transformation,
and aggregation.
"""
def __init__(self,
symbol: str,
data_types: Optional[List[DataType]] = None,
component_name: Optional[str] = None,
auto_restart: bool = True,
health_check_interval: float = 30.0,
store_raw_data: bool = True,
force_update_candles: bool = False,
timeframes: Optional[List[str]] = None,
candle_config: Optional[CandleProcessingConfig] = None,
logger = None,
log_errors_only: bool = False):
"""
Initialize OKX collector for a single trading pair.
Args:
symbol: Trading symbol (e.g., 'BTC-USDT')
data_types: Types of data to collect (default: [DataType.TRADE, DataType.ORDERBOOK])
component_name: Name for logging (default: f'okx_collector_{symbol}')
auto_restart: Enable automatic restart on failures
health_check_interval: Seconds between health checks
store_raw_data: Whether to store raw data for debugging
force_update_candles: If True, update existing candles; if False, keep existing candles unchanged
timeframes: List of timeframes to collect (e.g., ['1s', '5s', '1m'])
candle_config: Optional CandleProcessingConfig instance (will create one if not provided)
logger: Logger instance for conditional logging (None for no logging)
log_errors_only: If True and logger provided, only log error-level messages
"""
# Default data types if not specified
if data_types is None:
data_types = [DataType.TRADE, DataType.ORDERBOOK]
# Component name for logging
if component_name is None:
component_name = f"okx_collector_{symbol.replace('-', '_').lower()}"
# Initialize base collector
super().__init__(
exchange_name="okx",
symbols=[symbol],
data_types=data_types,
timeframes=timeframes, # Pass timeframes to base collector
component_name=component_name,
auto_restart=auto_restart,
health_check_interval=health_check_interval,
logger=logger,
log_errors_only=log_errors_only
)
# OKX-specific settings
self.symbol = symbol
self.store_raw_data = store_raw_data
self.force_update_candles = force_update_candles
# WebSocket client
self._ws_client: Optional[OKXWebSocketClient] = None
# Data processor using new common framework
self._data_processor = OKXDataProcessor(
symbol,
config=candle_config or CandleProcessingConfig(timeframes=self.timeframes), # Use provided config or create new one
component_name=f"{component_name}_processor",
logger=self.logger
)
# Add callbacks for processed data
self._data_processor.add_trade_callback(self._on_trade_processed)
self._data_processor.add_candle_callback(self._on_candle_processed)
# Database operations using new repository pattern
self._db_operations = None
# Data processing counters
self._message_count = 0
self._processed_trades = 0
self._processed_candles = 0
self._error_count = 0
# OKX channel mapping
self._channel_mapping = {
DataType.TRADE: OKXChannelType.TRADES.value,
DataType.ORDERBOOK: OKXChannelType.BOOKS5.value,
DataType.TICKER: OKXChannelType.TICKERS.value
}
self._log_info(f"{component_name}: Initialized OKX collector for {symbol} with data types: {[dt.value for dt in data_types]}")
self._log_info(f"{component_name}: Using timeframes: {self.timeframes}")
self._log_info(f"{component_name}: Using common data processing framework")
async def connect(self) -> bool:
"""
Establish connection to OKX WebSocket API.
Returns:
True if connection successful, False otherwise
"""
return await self._connection_manager.connect(self._actual_connect)
async def disconnect(self) -> None:
"""
Disconnect from OKX WebSocket API.
"""
await self._connection_manager.disconnect(self._actual_disconnect)
async def _actual_connect(self) -> bool:
"""
Implement the actual connection logic for OKX WebSocket API.
Returns:
True if connection successful, False otherwise
"""
try:
self._log_info(f"Connecting OKX collector for {self.symbol}")
# Initialize database operations using repository pattern
self._db_operations = get_database_operations(self.logger) # self.logger needs to be accessible for database operations
# Create WebSocket client
ws_component_name = f"okx_ws_{self.symbol.replace('-', '_').lower()}"
self._ws_client = OKXWebSocketClient(
component_name=ws_component_name,
ping_interval=25.0,
pong_timeout=10.0,
max_reconnect_attempts=5,
reconnect_delay=5.0,
logger=self.logger
)
# Add message callback
self._ws_client.add_message_callback(self._on_message)
# Connect to WebSocket
if not await self._ws_client.connect(use_public=True):
self._log_error(f"Failed to connect to OKX WebSocket")
return False
self._log_info(f"Successfully connected OKX collector for {self.symbol}")
return True
except Exception as e:
self._log_error(f"Error connecting OKX collector for {self.symbol}: {e}")
return False
async def _actual_disconnect(self) -> None:
"""
Implement the actual disconnection logic for OKX WebSocket API.
"""
try:
self._log_info(f"Disconnecting OKX collector for {self.symbol}")
if self._ws_client:
await self._ws_client.disconnect()
self._ws_client = None
self._log_info(f"Disconnected OKX collector for {self.symbol}")
except Exception as e:
self._log_error(f"Error disconnecting OKX collector for {self.symbol}: {e}")
async def subscribe_to_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
"""
Subscribe to data streams for specified symbols and data types.
Args:
symbols: Trading symbols to subscribe to (should contain self.symbol)
data_types: Types of data to subscribe to
Returns:
True if subscription successful, False otherwise
"""
if not self._ws_client or not self._ws_client.is_connected:
self._log_error(f"WebSocket client not connected")
return False
# Validate symbol
if self.symbol not in symbols:
self._log_warning(f"Symbol {self.symbol} not in subscription list: {symbols}")
return False
try:
# Build subscriptions
subscriptions = []
for data_type in data_types:
if data_type in self._channel_mapping:
channel = self._channel_mapping[data_type]
subscription = OKXSubscription(
channel=channel,
inst_id=self.symbol,
enabled=True
)
subscriptions.append(subscription)
self._log_debug(f"Added subscription: {channel} for {self.symbol}")
else:
self._log_warning(f"Unsupported data type: {data_type}")
if not subscriptions:
self._log_warning(f"No valid subscriptions to create")
return False
# Subscribe to channels
success = await self._ws_client.subscribe(subscriptions)
if success:
self._log_info(f"Successfully subscribed to {len(subscriptions)} channels for {self.symbol}")
return True
else:
self._log_error(f"Failed to subscribe to channels for {self.symbol}")
return False
except Exception as e:
self._log_error(f"Error subscribing to data for {self.symbol}: {e}")
return False
async def unsubscribe_from_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
"""
Unsubscribe from data streams for specified symbols and data types.
Args:
symbols: Trading symbols to unsubscribe from
data_types: Types of data to unsubscribe from
Returns:
True if unsubscription successful, False otherwise
"""
if not self._ws_client or not self._ws_client.is_connected:
self._log_warning(f"WebSocket client not connected")
return True # Consider it successful if not connected
try:
# Build unsubscription list
subscriptions = []
for data_type in data_types:
if data_type in self._channel_mapping:
channel = self._channel_mapping[data_type]
subscription = OKXSubscription(
channel=channel,
inst_id=self.symbol,
enabled=False # False for unsubscribe
)
subscriptions.append(subscription)
if not subscriptions:
return True
# Unsubscribe from channels
success = await self._ws_client.unsubscribe(subscriptions)
if success:
self._log_info(f"Successfully unsubscribed from {len(subscriptions)} channels for {self.symbol}")
return True
else:
self._log_error(f"Failed to unsubscribe from channels for {self.symbol}")
return False
except Exception as e:
self._log_error(f"Error unsubscribing from data for {self.symbol}: {e}")
return False
async def _process_message(self, message: Any) -> Optional[MarketDataPoint]:
"""
Process received message using the new data processor.
Args:
message: Raw message from WebSocket
Returns:
MarketDataPoint if processing successful, None otherwise
"""
if not isinstance(message, dict):
self._log_warning(f"Received non-dict message: {type(message)}")
return None
try:
self._message_count += 1
# Use the new data processor for validation and processing
success, market_data_points, errors = self._data_processor.validate_and_process_message(
message, expected_symbol=self.symbol
)
if not success:
self._error_count += 1
self._log_error(f"Message processing failed: {errors}")
return None
if errors:
self._log_warning(f"Message processing warnings: {errors}")
# Store raw data if enabled (for debugging/compliance)
if self.store_raw_data:
if 'data' in message and 'arg' in message:
await self._store_raw_data(message['arg'].get('channel', 'unknown'), message)
# Store processed market data points in raw_trades table
for data_point in market_data_points:
await self._store_processed_data(data_point)
# Return the first data point for compatibility (most use cases have single data point per message)
return market_data_points[0] if market_data_points else None
except Exception as e:
self._error_count += 1
self._log_error(f"Error processing message: {e}")
return None
async def _handle_messages(self) -> None:
"""
Handle message processing in the background.
This method exists for compatibility with BaseDataCollector
"""
# The new data processor handles messages through callbacks
# Update heartbeat to indicate the message loop is active
self._state_telemetry.update_heartbeat()
# Check if we're receiving WebSocket messages
if self._ws_client and self._ws_client.is_connected:
# Update last data received timestamp if WebSocket is connected and active
self._state_telemetry.update_data_received_timestamp()
# Short sleep to prevent busy loop while maintaining heartbeat
await asyncio.sleep(0.1)
async def _store_processed_data(self, data_point: MarketDataPoint) -> None:
"""
Store raw market data in the raw_trades table.
Args:
data_point: Raw market data point (trade, orderbook, ticker)
"""
try:
if not self._db_operations:
return
# Store raw market data points in raw_trades table using repository
success = self._db_operations.raw_trades.insert_market_data_point(data_point)
if success:
self._log_debug(f"Stored raw data: {data_point.data_type.value} for {data_point.symbol}")
except DatabaseOperationError as e:
self._log_error(f"Database error storing raw market data: {e}")
except Exception as e:
self._log_error(f"Error storing raw market data: {e}")
async def _store_completed_candle(self, candle: OHLCVCandle) -> None:
"""
Store completed OHLCV candle in the market_data table.
Handles duplicate candles based on force_update_candles setting:
- If force_update_candles=True: UPDATE existing records with latest values
- If force_update_candles=False: IGNORE duplicates, keep existing records unchanged
Args:
candle: Completed OHLCV candle
"""
try:
if not self._db_operations:
return
# Store completed candles using repository pattern
success = self._db_operations.market_data.upsert_candle(candle, self.force_update_candles)
if success:
action = "Updated" if self.force_update_candles else "Stored"
self._log_debug(f"{action} candle: {candle.symbol} {candle.timeframe} at {candle.end_time} (force_update={self.force_update_candles}) - OHLCV: {candle.open}/{candle.high}/{candle.low}/{candle.close}, Vol: {candle.volume}, Trades: {candle.trade_count}")
except DatabaseOperationError as e:
self._log_error(f"Database error storing completed candle: {e}")
# Log candle details for debugging
self._log_error(f"Failed candle details: {candle.symbol} {candle.timeframe} {candle.end_time} - OHLCV: {candle.open}/{candle.high}/{candle.low}/{candle.close}")
self._error_count += 1
except Exception as e:
self._log_error(f"Error storing completed candle: {e}")
# Log candle details for debugging
self._log_error(f"Failed candle details: {candle.symbol} {candle.timeframe} {candle.end_time} - OHLCV: {candle.open}/{candle.high}/{candle.low}/{candle.close}")
self._error_count += 1
async def _store_raw_data(self, channel: str, raw_message: Dict[str, Any]) -> None:
"""
Store raw WebSocket data for debugging in raw_trades table.
Args:
channel: Channel name
raw_message: Raw WebSocket message
"""
try:
if not self._db_operations or 'data' not in raw_message:
return
# Store each data item as a separate raw data record using repository
for data_item in raw_message['data']:
success = self._db_operations.raw_trades.insert_raw_websocket_data(
exchange="okx",
symbol=self.symbol,
data_type=f"raw_{channel}", # Prefix with 'raw_' to distinguish from processed data
raw_data=data_item,
timestamp=datetime.now(timezone.utc)
)
if not success:
self._log_warning(f"Failed to store raw WebSocket data for {channel}")
except DatabaseOperationError as e:
self._log_error(f"Database error storing raw WebSocket data: {e}")
except Exception as e:
self._log_error(f"Error storing raw WebSocket data: {e}")
def _on_message(self, message: Dict[str, Any]) -> None:
"""
Handle incoming WebSocket message.
Args:
message: WebSocket message from OKX
"""
try:
# Update heartbeat and data received timestamps
self._state_telemetry.update_heartbeat()
self._state_telemetry.update_data_received_timestamp()
self._message_count += 1
# Process message asynchronously
asyncio.create_task(self._process_message(message))
except Exception as e:
self._log_error(f"Error handling WebSocket message: {e}")
def _on_trade_processed(self, trade: StandardizedTrade) -> None:
"""
Callback for processed trades from data processor.
Args:
trade: Processed standardized trade
"""
self._processed_trades += 1
self._log_debug(f"Processed trade: {trade.symbol} {trade.side} {trade.size}@{trade.price}")
def _on_candle_processed(self, candle: OHLCVCandle) -> None:
"""
Callback for completed candles from data processor.
Args:
candle: Completed OHLCV candle
"""
self._processed_candles += 1
self._log_debug(f"Processed candle: {candle.symbol} {candle.timeframe}")
# Store completed candle in market_data table
if candle.is_complete:
asyncio.create_task(self._store_completed_candle(candle))
def get_status(self) -> Dict[str, Any]:
"""
Get current collector status and statistics.
Returns:
Dictionary containing status information
"""
return self._state_telemetry.get_status()
def get_health_status(self) -> Dict[str, Any]:
"""
Get detailed health status for monitoring.
Returns:
Dictionary containing health information
"""
return self._state_telemetry.get_health_status()
def __repr__(self) -> str:
"""String representation of the collector."""
return f"<{self.__class__.__name__}({self.exchange_name}, {len(self.symbols)} symbols, {self._state_telemetry.status.value})>"