2025-05-30 20:33:56 +08:00
|
|
|
"""
|
|
|
|
|
Abstract base class for data collectors.
|
|
|
|
|
|
|
|
|
|
This module provides a common interface for all data collection implementations,
|
|
|
|
|
ensuring consistency across different exchange connectors and data sources.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
|
from decimal import Decimal
|
|
|
|
|
from typing import Dict, List, Optional, Any, Callable, Set
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from enum import Enum
|
|
|
|
|
|
|
|
|
|
from utils.logger import get_logger
|
2025-06-09 17:27:29 +08:00
|
|
|
from .collector.collector_state_telemetry import CollectorStatus, CollectorStateAndTelemetry
|
2025-06-09 17:42:06 +08:00
|
|
|
from .collector.collector_connection_manager import ConnectionManager
|
2025-06-09 17:47:26 +08:00
|
|
|
from .collector.collector_callback_dispatcher import CallbackDispatcher
|
2025-06-09 17:42:06 +08:00
|
|
|
from .common.data_types import DataType, MarketDataPoint
|
2025-06-10 12:04:58 +08:00
|
|
|
from .common.ohlcv_data import OHLCVData, DataValidationError, validate_ohlcv_data
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class DataCollectorError(Exception):
|
|
|
|
|
"""Base exception for data collector errors."""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ConnectionError(DataCollectorError):
|
|
|
|
|
"""Exception raised when connection to data source fails."""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BaseDataCollector(ABC):
|
|
|
|
|
"""
|
|
|
|
|
Abstract base class for all data collectors.
|
|
|
|
|
|
|
|
|
|
This class defines the interface that all data collection implementations
|
|
|
|
|
must follow, providing consistency across different exchanges and data sources.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
exchange_name: str,
|
|
|
|
|
symbols: List[str],
|
|
|
|
|
data_types: Optional[List[DataType]] = None,
|
2025-06-07 15:46:24 +08:00
|
|
|
timeframes: Optional[List[str]] = None,
|
2025-05-30 20:33:56 +08:00
|
|
|
component_name: Optional[str] = None,
|
|
|
|
|
auto_restart: bool = True,
|
2025-06-01 14:42:29 +08:00
|
|
|
health_check_interval: float = 30.0,
|
2025-06-07 15:46:24 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
logger = None,
|
|
|
|
|
log_errors_only: bool = False):
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
|
|
|
|
Initialize the base data collector.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
|
|
|
|
|
symbols: List of trading symbols to collect data for
|
|
|
|
|
data_types: Types of data to collect (default: [DataType.CANDLE])
|
2025-06-07 15:46:24 +08:00
|
|
|
timeframes: List of timeframes to collect (e.g., ['1s', '1m', '5m'])
|
2025-05-30 20:33:56 +08:00
|
|
|
component_name: Name for logging (default: based on exchange_name)
|
|
|
|
|
auto_restart: Enable automatic restart on failures (default: True)
|
|
|
|
|
health_check_interval: Seconds between health checks (default: 30.0)
|
2025-06-01 14:42:29 +08:00
|
|
|
logger: Logger instance. If None, no logging will be performed.
|
|
|
|
|
log_errors_only: If True and logger is provided, only log error-level messages
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
|
|
|
|
self.exchange_name = exchange_name.lower()
|
|
|
|
|
self.symbols = set(symbols)
|
|
|
|
|
self.data_types = data_types or [DataType.CANDLE]
|
2025-06-07 15:46:24 +08:00
|
|
|
self.timeframes = timeframes or ['1m', '5m'] # Default timeframes if none provided
|
2025-05-30 20:33:56 +08:00
|
|
|
self.auto_restart = auto_restart
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Initialize logger based on parameters
|
|
|
|
|
if logger is not None:
|
|
|
|
|
self.logger = logger
|
|
|
|
|
else:
|
2025-06-09 17:27:29 +08:00
|
|
|
self.logger = get_logger(self.exchange_name) # Ensure a logger is always available
|
|
|
|
|
|
|
|
|
|
# Initialize state and telemetry manager
|
|
|
|
|
component = component_name or f"{self.exchange_name}_collector"
|
|
|
|
|
self._state_telemetry = CollectorStateAndTelemetry(
|
|
|
|
|
exchange_name=self.exchange_name,
|
|
|
|
|
component_name=component,
|
|
|
|
|
health_check_interval=health_check_interval,
|
|
|
|
|
logger=self.logger, # Pass the actual logger instance
|
|
|
|
|
log_errors_only=log_errors_only
|
|
|
|
|
)
|
|
|
|
|
self.component_name = component # Keep for external access
|
|
|
|
|
|
2025-06-09 17:42:06 +08:00
|
|
|
# Initialize connection manager
|
|
|
|
|
self._connection_manager = ConnectionManager(
|
|
|
|
|
exchange_name=self.exchange_name,
|
|
|
|
|
component_name=component,
|
|
|
|
|
max_reconnect_attempts=5, # Default, can be made configurable later
|
|
|
|
|
reconnect_delay=5.0, # Default, can be made configurable later
|
|
|
|
|
logger=self.logger,
|
|
|
|
|
state_telemetry=self._state_telemetry
|
|
|
|
|
)
|
|
|
|
|
|
2025-06-09 17:47:26 +08:00
|
|
|
# Initialize callback dispatcher
|
|
|
|
|
self._callback_dispatcher = CallbackDispatcher(
|
|
|
|
|
component_name=component,
|
|
|
|
|
logger=self.logger
|
|
|
|
|
)
|
|
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
# Collector state (now managed by _state_telemetry)
|
2025-05-30 20:33:56 +08:00
|
|
|
self._tasks: Set[asyncio.Task] = set()
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Log initialization if logger is available
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.logger:
|
|
|
|
|
if not self._state_telemetry.log_errors_only:
|
|
|
|
|
self._state_telemetry._log_info(f"{self.component_name}: Initialized {self.exchange_name} data collector for symbols: {', '.join(symbols)}")
|
|
|
|
|
self._state_telemetry._log_info(f"{self.component_name}: Using timeframes: {', '.join(self.timeframes)}")
|
2025-06-01 14:42:29 +08:00
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
@property
|
|
|
|
|
def status(self) -> CollectorStatus:
|
|
|
|
|
return self._state_telemetry.status
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
def _log_debug(self, message: str) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_debug(message)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_info(self, message: str) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_info(message)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_warning(self, message: str) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_warning(message)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_error(self, message: str, exc_info: bool = False) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_error(message, exc_info=exc_info)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_critical(self, message: str, exc_info: bool = False) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_critical(message, exc_info=exc_info)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def connect(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Establish connection to the data source.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if connection successful, False otherwise
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def disconnect(self) -> None:
|
|
|
|
|
"""Disconnect from the data source."""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def subscribe_to_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Subscribe to data streams for specified symbols and data types.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbols: Trading symbols to subscribe to
|
|
|
|
|
data_types: Types of data to subscribe to
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if subscription successful, False otherwise
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def unsubscribe_from_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Unsubscribe from data streams.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbols: Trading symbols to unsubscribe from
|
|
|
|
|
data_types: Types of data to unsubscribe from
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if unsubscription successful, False otherwise
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _process_message(self, message: Any) -> Optional[MarketDataPoint]:
|
|
|
|
|
"""
|
|
|
|
|
Process incoming message from the data source.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
message: Raw message from the data source
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Processed MarketDataPoint or None if message should be ignored
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
async def start(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Start the data collector.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if started successfully, False otherwise
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
# Check if already running or starting
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status in [CollectorStatus.RUNNING, CollectorStatus.STARTING]:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning("Data collector is already running or starting")
|
2025-05-30 20:33:56 +08:00
|
|
|
return True
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Starting {self.exchange_name} data collector")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.STARTING)
|
|
|
|
|
self._state_telemetry.set_should_be_running(True)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Connect to data source
|
2025-06-09 17:42:06 +08:00
|
|
|
if not await self._connection_manager.connect(self._actual_connect):
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error("Failed to connect to data source")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-05-30 20:33:56 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Subscribe to data streams
|
|
|
|
|
if not await self.subscribe_to_data(list(self.symbols), self.data_types):
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error("Failed to subscribe to data streams")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-06-09 17:42:06 +08:00
|
|
|
await self._connection_manager.disconnect(self._actual_disconnect)
|
2025-05-30 20:33:56 +08:00
|
|
|
return False
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Start background tasks
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.set_running_state(True)
|
|
|
|
|
self._state_telemetry.update_status(CollectorStatus.RUNNING)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Start message processing task
|
2025-05-30 20:33:56 +08:00
|
|
|
message_task = asyncio.create_task(self._message_loop())
|
|
|
|
|
self._tasks.add(message_task)
|
|
|
|
|
message_task.add_done_callback(self._tasks.discard)
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Start health monitoring task
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.health_check_interval > 0:
|
2025-05-30 20:33:56 +08:00
|
|
|
health_task = asyncio.create_task(self._health_monitor())
|
|
|
|
|
self._tasks.add(health_task)
|
|
|
|
|
health_task.add_done_callback(self._tasks.discard)
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"{self.exchange_name} data collector started successfully")
|
2025-05-30 20:33:56 +08:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error(f"Failed to start data collector: {e}")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
|
|
|
|
self._state_telemetry.set_should_be_running(False)
|
2025-05-30 20:33:56 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
async def stop(self, force: bool = False) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Stop the data collector and cleanup resources.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
force: Force stop even if not graceful
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.STOPPED:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning("Data collector is already stopped")
|
2025-05-30 20:33:56 +08:00
|
|
|
return
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Stopping {self.exchange_name} data collector")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.STOPPING)
|
|
|
|
|
self._state_telemetry.set_should_be_running(False)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
try:
|
2025-06-01 14:42:29 +08:00
|
|
|
# Stop background tasks
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.set_running_state(False)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
2025-05-30 20:33:56 +08:00
|
|
|
# Cancel all tasks
|
|
|
|
|
for task in list(self._tasks):
|
2025-06-01 14:42:29 +08:00
|
|
|
if not task.done():
|
|
|
|
|
task.cancel()
|
|
|
|
|
if not force:
|
|
|
|
|
try:
|
|
|
|
|
await task
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._tasks.clear()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
# Unsubscribe and disconnect
|
|
|
|
|
await self.unsubscribe_from_data(list(self.symbols), self.data_types)
|
2025-06-09 17:42:06 +08:00
|
|
|
await self._connection_manager.disconnect(self._actual_disconnect)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.STOPPED)
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"{self.exchange_name} data collector stopped")
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error(f"Error stopping data collector: {e}")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
async def restart(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Restart the data collector.
|
|
|
|
|
|
|
|
|
|
Returns:
|
2025-06-01 14:42:29 +08:00
|
|
|
True if restarted successfully, False otherwise
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Restarting {self.exchange_name} data collector")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.increment_restarts()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Stop first
|
|
|
|
|
await self.stop()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Wait a bit before restarting
|
2025-06-09 17:42:06 +08:00
|
|
|
await asyncio.sleep(self._connection_manager._reconnect_delay)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
# Start again
|
|
|
|
|
return await self.start()
|
|
|
|
|
|
|
|
|
|
async def _message_loop(self) -> None:
|
|
|
|
|
"""Main message processing loop."""
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
|
|
|
|
self._log_debug("Starting message processing loop")
|
|
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
while self._state_telemetry._running:
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
|
|
|
|
await self._handle_messages()
|
|
|
|
|
except asyncio.CancelledError:
|
2025-05-30 20:33:56 +08:00
|
|
|
break
|
2025-06-01 14:42:29 +08:00
|
|
|
except Exception as e:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.increment_errors(str(e))
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error(f"Error processing messages: {e}")
|
|
|
|
|
|
|
|
|
|
# Small delay to prevent tight error loops
|
|
|
|
|
await asyncio.sleep(0.1)
|
|
|
|
|
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
self._log_debug("Message loop cancelled")
|
|
|
|
|
raise
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self._log_error(f"Error in message loop: {e}")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
async def _health_monitor(self) -> None:
|
|
|
|
|
"""Monitor collector health and restart if needed."""
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
|
|
|
|
self._log_debug("Starting health monitor")
|
|
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
while self._state_telemetry._running:
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
2025-06-09 17:27:29 +08:00
|
|
|
await asyncio.sleep(self._state_telemetry.health_check_interval)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
# Check if collector should be running but isn't
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry._should_be_running and self._state_telemetry.status != CollectorStatus.RUNNING:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning("Collector should be running but isn't - restarting")
|
|
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
2025-05-30 20:33:56 +08:00
|
|
|
continue
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
# Check heartbeat
|
2025-06-09 17:27:29 +08:00
|
|
|
time_since_heartbeat = datetime.now(timezone.utc) - self._state_telemetry._last_heartbeat
|
|
|
|
|
if time_since_heartbeat > timedelta(seconds=self._state_telemetry.health_check_interval * 2):
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning(f"No heartbeat for {time_since_heartbeat.total_seconds():.1f}s - restarting")
|
|
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Check data reception
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry._last_data_received:
|
|
|
|
|
time_since_data = datetime.now(timezone.utc) - self._state_telemetry._last_data_received
|
|
|
|
|
if time_since_data > self._state_telemetry._max_silence_duration:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning(f"No data received for {time_since_data.total_seconds():.1f}s - restarting")
|
|
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Check for error status
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.ERROR:
|
|
|
|
|
self._log_warning(f"Collector in {self._state_telemetry.status.value} status - restarting")
|
2025-06-01 14:42:29 +08:00
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
|
|
|
|
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
self._log_debug("Health monitor cancelled")
|
|
|
|
|
raise
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self._log_error(f"Error in health monitor: {e}")
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _handle_messages(self) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Handle incoming messages from the data source.
|
|
|
|
|
This method should be implemented by subclasses to handle their specific message format.
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
async def _handle_connection_error(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Handle connection errors and attempt reconnection.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if reconnection successful, False if max attempts exceeded
|
|
|
|
|
"""
|
2025-06-09 17:42:06 +08:00
|
|
|
return await self._connection_manager.handle_connection_error(
|
|
|
|
|
connect_logic=self._actual_connect,
|
|
|
|
|
subscribe_logic=self.subscribe_to_data,
|
|
|
|
|
symbols=list(self.symbols),
|
|
|
|
|
data_types=self.data_types
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _actual_connect(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Abstract method for subclasses to implement actual connection logic.
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _actual_disconnect(self) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Abstract method for subclasses to implement actual disconnection logic.
|
|
|
|
|
"""
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def add_data_callback(self, data_type: DataType, callback: Callable[[MarketDataPoint], None]) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Add a callback function for specific data type.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
data_type: Type of data to monitor
|
|
|
|
|
callback: Function to call when data is received
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:47:26 +08:00
|
|
|
self._callback_dispatcher.add_data_callback(data_type, callback)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def remove_data_callback(self, data_type: DataType, callback: Callable[[MarketDataPoint], None]) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Remove a callback function for specific data type.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
data_type: Type of data to stop monitoring
|
|
|
|
|
callback: Function to remove
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:47:26 +08:00
|
|
|
self._callback_dispatcher.remove_data_callback(data_type, callback)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
async def _notify_callbacks(self, data_point: MarketDataPoint) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Notify all registered callbacks for a data point.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
data_point: Market data to distribute
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:47:26 +08:00
|
|
|
await self._callback_dispatcher.notify_callbacks(data_point)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
# Update statistics
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.increment_messages_processed()
|
|
|
|
|
self._state_telemetry._stats['last_message_time'] = data_point.timestamp # Direct update for now, will refactor
|
|
|
|
|
self._state_telemetry.update_data_received_timestamp()
|
|
|
|
|
self._state_telemetry.update_heartbeat()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Get current collector status and statistics.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary containing status information
|
|
|
|
|
"""
|
2025-06-10 12:04:58 +08:00
|
|
|
status = self._state_telemetry.get_status()
|
|
|
|
|
|
|
|
|
|
# Add BaseDataCollector specific information
|
|
|
|
|
status.update({
|
|
|
|
|
'symbols': list(self.symbols),
|
|
|
|
|
'data_types': [dt.value for dt in self.data_types],
|
|
|
|
|
'timeframes': self.timeframes,
|
|
|
|
|
'auto_restart': self.auto_restart
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return status
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def get_health_status(self) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Get detailed health status for monitoring.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary containing health information
|
|
|
|
|
"""
|
2025-06-09 17:27:29 +08:00
|
|
|
return self._state_telemetry.get_health_status()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def add_symbol(self, symbol: str) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Add a new symbol to collect data for.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbol: Trading symbol to add
|
|
|
|
|
"""
|
|
|
|
|
if symbol not in self.symbols:
|
|
|
|
|
self.symbols.add(symbol)
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Added symbol: {symbol}")
|
|
|
|
|
|
|
|
|
|
# If collector is running, subscribe to new symbol
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.RUNNING:
|
2025-06-01 14:42:29 +08:00
|
|
|
# Note: This needs to be called from an async context
|
|
|
|
|
# Users should handle this appropriately
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def remove_symbol(self, symbol: str) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Remove a symbol from data collection.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbol: Trading symbol to remove
|
|
|
|
|
"""
|
|
|
|
|
if symbol in self.symbols:
|
|
|
|
|
self.symbols.remove(symbol)
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Removed symbol: {symbol}")
|
|
|
|
|
|
|
|
|
|
# If collector is running, unsubscribe from symbol
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.RUNNING:
|
2025-06-01 14:42:29 +08:00
|
|
|
# Note: This needs to be called from an async context
|
|
|
|
|
# Users should handle this appropriately
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def validate_ohlcv_data(self, data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
|
|
|
|
|
"""
|
|
|
|
|
Validate and convert raw OHLCV data to standardized format.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
data: Raw OHLCV data dictionary
|
|
|
|
|
symbol: Trading symbol
|
|
|
|
|
timeframe: Timeframe (e.g., '1m', '5m', '1h')
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Validated OHLCVData object
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
DataValidationError: If data validation fails
|
|
|
|
|
"""
|
2025-06-10 12:04:58 +08:00
|
|
|
return validate_ohlcv_data(data, symbol, timeframe)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
"""String representation of the collector."""
|
|
|
|
|
return f"<{self.__class__.__name__}({self.exchange_name}, {len(self.symbols)} symbols, {self.status.value})>"
|