2025-05-30 20:33:56 +08:00
|
|
|
"""
|
|
|
|
|
Abstract base class for data collectors.
|
|
|
|
|
|
|
|
|
|
This module provides a common interface for all data collection implementations,
|
|
|
|
|
ensuring consistency across different exchange connectors and data sources.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
|
from decimal import Decimal
|
|
|
|
|
from typing import Dict, List, Optional, Any, Callable, Set
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from enum import Enum
|
|
|
|
|
|
|
|
|
|
from utils.logger import get_logger
|
Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.
These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00
|
|
|
from .collector_state_telemetry import CollectorStatus, CollectorStateAndTelemetry
|
|
|
|
|
from .collector_connection_manager import ConnectionManager
|
|
|
|
|
from .collector_callback_dispatcher import CallbackDispatcher
|
|
|
|
|
from ..common.data_types import DataType, MarketDataPoint
|
|
|
|
|
from ..common.ohlcv_data import OHLCVData, DataValidationError, validate_ohlcv_data
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class DataCollectorError(Exception):
|
|
|
|
|
"""Base exception for data collector errors."""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ConnectionError(DataCollectorError):
|
|
|
|
|
"""Exception raised when connection to data source fails."""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BaseDataCollector(ABC):
|
|
|
|
|
"""
|
|
|
|
|
Abstract base class for all data collectors.
|
|
|
|
|
|
|
|
|
|
This class defines the interface that all data collection implementations
|
|
|
|
|
must follow, providing consistency across different exchanges and data sources.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
exchange_name: str,
|
|
|
|
|
symbols: List[str],
|
|
|
|
|
data_types: Optional[List[DataType]] = None,
|
2025-06-07 15:46:24 +08:00
|
|
|
timeframes: Optional[List[str]] = None,
|
2025-05-30 20:33:56 +08:00
|
|
|
component_name: Optional[str] = None,
|
|
|
|
|
auto_restart: bool = True,
|
2025-06-01 14:42:29 +08:00
|
|
|
health_check_interval: float = 30.0,
|
2025-06-07 15:46:24 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
logger = None,
|
|
|
|
|
log_errors_only: bool = False):
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
|
|
|
|
Initialize the base data collector.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
|
|
|
|
|
symbols: List of trading symbols to collect data for
|
|
|
|
|
data_types: Types of data to collect (default: [DataType.CANDLE])
|
2025-06-07 15:46:24 +08:00
|
|
|
timeframes: List of timeframes to collect (e.g., ['1s', '1m', '5m'])
|
2025-05-30 20:33:56 +08:00
|
|
|
component_name: Name for logging (default: based on exchange_name)
|
|
|
|
|
auto_restart: Enable automatic restart on failures (default: True)
|
|
|
|
|
health_check_interval: Seconds between health checks (default: 30.0)
|
2025-06-01 14:42:29 +08:00
|
|
|
logger: Logger instance. If None, no logging will be performed.
|
|
|
|
|
log_errors_only: If True and logger is provided, only log error-level messages
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
|
|
|
|
self.exchange_name = exchange_name.lower()
|
|
|
|
|
self.symbols = set(symbols)
|
|
|
|
|
self.data_types = data_types or [DataType.CANDLE]
|
2025-06-07 15:46:24 +08:00
|
|
|
self.timeframes = timeframes or ['1m', '5m'] # Default timeframes if none provided
|
2025-05-30 20:33:56 +08:00
|
|
|
self.auto_restart = auto_restart
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Initialize logger based on parameters
|
|
|
|
|
if logger is not None:
|
|
|
|
|
self.logger = logger
|
|
|
|
|
else:
|
2025-06-09 17:27:29 +08:00
|
|
|
self.logger = get_logger(self.exchange_name) # Ensure a logger is always available
|
|
|
|
|
|
|
|
|
|
# Initialize state and telemetry manager
|
|
|
|
|
component = component_name or f"{self.exchange_name}_collector"
|
|
|
|
|
self._state_telemetry = CollectorStateAndTelemetry(
|
|
|
|
|
exchange_name=self.exchange_name,
|
|
|
|
|
component_name=component,
|
|
|
|
|
health_check_interval=health_check_interval,
|
|
|
|
|
logger=self.logger, # Pass the actual logger instance
|
|
|
|
|
log_errors_only=log_errors_only
|
|
|
|
|
)
|
|
|
|
|
self.component_name = component # Keep for external access
|
|
|
|
|
|
2025-06-09 17:42:06 +08:00
|
|
|
# Initialize connection manager
|
|
|
|
|
self._connection_manager = ConnectionManager(
|
|
|
|
|
exchange_name=self.exchange_name,
|
|
|
|
|
component_name=component,
|
|
|
|
|
max_reconnect_attempts=5, # Default, can be made configurable later
|
|
|
|
|
reconnect_delay=5.0, # Default, can be made configurable later
|
|
|
|
|
logger=self.logger,
|
|
|
|
|
state_telemetry=self._state_telemetry
|
|
|
|
|
)
|
|
|
|
|
|
2025-06-09 17:47:26 +08:00
|
|
|
# Initialize callback dispatcher
|
|
|
|
|
self._callback_dispatcher = CallbackDispatcher(
|
|
|
|
|
component_name=component,
|
|
|
|
|
logger=self.logger
|
|
|
|
|
)
|
|
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
# Collector state (now managed by _state_telemetry)
|
2025-05-30 20:33:56 +08:00
|
|
|
self._tasks: Set[asyncio.Task] = set()
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Log initialization if logger is available
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.logger:
|
|
|
|
|
if not self._state_telemetry.log_errors_only:
|
|
|
|
|
self._state_telemetry._log_info(f"{self.component_name}: Initialized {self.exchange_name} data collector for symbols: {', '.join(symbols)}")
|
|
|
|
|
self._state_telemetry._log_info(f"{self.component_name}: Using timeframes: {', '.join(self.timeframes)}")
|
2025-06-01 14:42:29 +08:00
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
@property
|
|
|
|
|
def status(self) -> CollectorStatus:
|
|
|
|
|
return self._state_telemetry.status
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
def _log_debug(self, message: str) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_debug(message)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_info(self, message: str) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_info(message)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_warning(self, message: str) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_warning(message)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_error(self, message: str, exc_info: bool = False) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_error(message, exc_info=exc_info)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
def _log_critical(self, message: str, exc_info: bool = False) -> None:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry._log_critical(message, exc_info=exc_info)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def connect(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Establish connection to the data source.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if connection successful, False otherwise
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def disconnect(self) -> None:
|
|
|
|
|
"""Disconnect from the data source."""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def subscribe_to_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Subscribe to data streams for specified symbols and data types.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbols: Trading symbols to subscribe to
|
|
|
|
|
data_types: Types of data to subscribe to
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if subscription successful, False otherwise
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def unsubscribe_from_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Unsubscribe from data streams.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbols: Trading symbols to unsubscribe from
|
|
|
|
|
data_types: Types of data to unsubscribe from
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if unsubscription successful, False otherwise
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _process_message(self, message: Any) -> Optional[MarketDataPoint]:
|
|
|
|
|
"""
|
|
|
|
|
Process incoming message from the data source.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
message: Raw message from the data source
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Processed MarketDataPoint or None if message should be ignored
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
async def start(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Start the data collector.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if started successfully, False otherwise
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
# Check if already running or starting
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status in [CollectorStatus.RUNNING, CollectorStatus.STARTING]:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning("Data collector is already running or starting")
|
2025-05-30 20:33:56 +08:00
|
|
|
return True
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Starting {self.exchange_name} data collector")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.STARTING)
|
|
|
|
|
self._state_telemetry.set_should_be_running(True)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Connect to data source
|
2025-06-09 17:42:06 +08:00
|
|
|
if not await self._connection_manager.connect(self._actual_connect):
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error("Failed to connect to data source")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-05-30 20:33:56 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Subscribe to data streams
|
|
|
|
|
if not await self.subscribe_to_data(list(self.symbols), self.data_types):
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error("Failed to subscribe to data streams")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-06-09 17:42:06 +08:00
|
|
|
await self._connection_manager.disconnect(self._actual_disconnect)
|
2025-05-30 20:33:56 +08:00
|
|
|
return False
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Start background tasks
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.set_running_state(True)
|
|
|
|
|
self._state_telemetry.update_status(CollectorStatus.RUNNING)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Start message processing task
|
2025-05-30 20:33:56 +08:00
|
|
|
message_task = asyncio.create_task(self._message_loop())
|
|
|
|
|
self._tasks.add(message_task)
|
|
|
|
|
message_task.add_done_callback(self._tasks.discard)
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Start health monitoring task
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.health_check_interval > 0:
|
2025-05-30 20:33:56 +08:00
|
|
|
health_task = asyncio.create_task(self._health_monitor())
|
|
|
|
|
self._tasks.add(health_task)
|
|
|
|
|
health_task.add_done_callback(self._tasks.discard)
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"{self.exchange_name} data collector started successfully")
|
2025-05-30 20:33:56 +08:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error(f"Failed to start data collector: {e}")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
|
|
|
|
self._state_telemetry.set_should_be_running(False)
|
2025-05-30 20:33:56 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
async def stop(self, force: bool = False) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Stop the data collector and cleanup resources.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
force: Force stop even if not graceful
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.STOPPED:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning("Data collector is already stopped")
|
2025-05-30 20:33:56 +08:00
|
|
|
return
|
|
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Stopping {self.exchange_name} data collector")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.STOPPING)
|
|
|
|
|
self._state_telemetry.set_should_be_running(False)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
try:
|
2025-06-01 14:42:29 +08:00
|
|
|
# Stop background tasks
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.set_running_state(False)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
2025-05-30 20:33:56 +08:00
|
|
|
# Cancel all tasks
|
|
|
|
|
for task in list(self._tasks):
|
2025-06-01 14:42:29 +08:00
|
|
|
if not task.done():
|
|
|
|
|
task.cancel()
|
|
|
|
|
if not force:
|
|
|
|
|
try:
|
|
|
|
|
await task
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
self._tasks.clear()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
# Unsubscribe and disconnect
|
|
|
|
|
await self.unsubscribe_from_data(list(self.symbols), self.data_types)
|
2025-06-09 17:42:06 +08:00
|
|
|
await self._connection_manager.disconnect(self._actual_disconnect)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.STOPPED)
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"{self.exchange_name} data collector stopped")
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error(f"Error stopping data collector: {e}")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
async def restart(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Restart the data collector.
|
|
|
|
|
|
|
|
|
|
Returns:
|
2025-06-01 14:42:29 +08:00
|
|
|
True if restarted successfully, False otherwise
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Restarting {self.exchange_name} data collector")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.increment_restarts()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Stop first
|
|
|
|
|
await self.stop()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
2025-06-01 14:42:29 +08:00
|
|
|
# Wait a bit before restarting
|
2025-06-09 17:42:06 +08:00
|
|
|
await asyncio.sleep(self._connection_manager._reconnect_delay)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
# Start again
|
|
|
|
|
return await self.start()
|
|
|
|
|
|
|
|
|
|
async def _message_loop(self) -> None:
|
|
|
|
|
"""Main message processing loop."""
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
|
|
|
|
self._log_debug("Starting message processing loop")
|
|
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
while self._state_telemetry._running:
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
|
|
|
|
await self._handle_messages()
|
|
|
|
|
except asyncio.CancelledError:
|
2025-05-30 20:33:56 +08:00
|
|
|
break
|
2025-06-01 14:42:29 +08:00
|
|
|
except Exception as e:
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.increment_errors(str(e))
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_error(f"Error processing messages: {e}")
|
|
|
|
|
|
|
|
|
|
# Small delay to prevent tight error loops
|
|
|
|
|
await asyncio.sleep(0.1)
|
|
|
|
|
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
self._log_debug("Message loop cancelled")
|
|
|
|
|
raise
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self._log_error(f"Error in message loop: {e}")
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.update_status(CollectorStatus.ERROR)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
async def _health_monitor(self) -> None:
|
|
|
|
|
"""Monitor collector health and restart if needed."""
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
|
|
|
|
self._log_debug("Starting health monitor")
|
|
|
|
|
|
2025-06-09 17:27:29 +08:00
|
|
|
while self._state_telemetry._running:
|
2025-06-01 14:42:29 +08:00
|
|
|
try:
|
2025-06-09 17:27:29 +08:00
|
|
|
await asyncio.sleep(self._state_telemetry.health_check_interval)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
# Check if collector should be running but isn't
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry._should_be_running and self._state_telemetry.status != CollectorStatus.RUNNING:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning("Collector should be running but isn't - restarting")
|
|
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
2025-05-30 20:33:56 +08:00
|
|
|
continue
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
# Check heartbeat
|
2025-06-09 17:27:29 +08:00
|
|
|
time_since_heartbeat = datetime.now(timezone.utc) - self._state_telemetry._last_heartbeat
|
|
|
|
|
if time_since_heartbeat > timedelta(seconds=self._state_telemetry.health_check_interval * 2):
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning(f"No heartbeat for {time_since_heartbeat.total_seconds():.1f}s - restarting")
|
|
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Check data reception
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry._last_data_received:
|
|
|
|
|
time_since_data = datetime.now(timezone.utc) - self._state_telemetry._last_data_received
|
|
|
|
|
if time_since_data > self._state_telemetry._max_silence_duration:
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_warning(f"No data received for {time_since_data.total_seconds():.1f}s - restarting")
|
|
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Check for error status
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.ERROR:
|
|
|
|
|
self._log_warning(f"Collector in {self._state_telemetry.status.value} status - restarting")
|
2025-06-01 14:42:29 +08:00
|
|
|
if self.auto_restart:
|
|
|
|
|
asyncio.create_task(self.restart())
|
|
|
|
|
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
self._log_debug("Health monitor cancelled")
|
|
|
|
|
raise
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self._log_error(f"Error in health monitor: {e}")
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _handle_messages(self) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Handle incoming messages from the data source.
|
|
|
|
|
This method should be implemented by subclasses to handle their specific message format.
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
async def _handle_connection_error(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Handle connection errors and attempt reconnection.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if reconnection successful, False if max attempts exceeded
|
|
|
|
|
"""
|
2025-06-09 17:42:06 +08:00
|
|
|
return await self._connection_manager.handle_connection_error(
|
|
|
|
|
connect_logic=self._actual_connect,
|
|
|
|
|
subscribe_logic=self.subscribe_to_data,
|
|
|
|
|
symbols=list(self.symbols),
|
|
|
|
|
data_types=self.data_types
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _actual_connect(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Abstract method for subclasses to implement actual connection logic.
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
async def _actual_disconnect(self) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Abstract method for subclasses to implement actual disconnection logic.
|
|
|
|
|
"""
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def add_data_callback(self, data_type: DataType, callback: Callable[[MarketDataPoint], None]) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Add a callback function for specific data type.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
data_type: Type of data to monitor
|
|
|
|
|
callback: Function to call when data is received
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:47:26 +08:00
|
|
|
self._callback_dispatcher.add_data_callback(data_type, callback)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def remove_data_callback(self, data_type: DataType, callback: Callable[[MarketDataPoint], None]) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Remove a callback function for specific data type.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
data_type: Type of data to stop monitoring
|
|
|
|
|
callback: Function to remove
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:47:26 +08:00
|
|
|
self._callback_dispatcher.remove_data_callback(data_type, callback)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
async def _notify_callbacks(self, data_point: MarketDataPoint) -> None:
|
|
|
|
|
"""
|
2025-06-01 14:42:29 +08:00
|
|
|
Notify all registered callbacks for a data point.
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
Args:
|
2025-06-01 14:42:29 +08:00
|
|
|
data_point: Market data to distribute
|
2025-05-30 20:33:56 +08:00
|
|
|
"""
|
2025-06-09 17:47:26 +08:00
|
|
|
await self._callback_dispatcher.notify_callbacks(data_point)
|
2025-06-01 14:42:29 +08:00
|
|
|
|
|
|
|
|
# Update statistics
|
2025-06-09 17:27:29 +08:00
|
|
|
self._state_telemetry.increment_messages_processed()
|
|
|
|
|
self._state_telemetry._stats['last_message_time'] = data_point.timestamp # Direct update for now, will refactor
|
|
|
|
|
self._state_telemetry.update_data_received_timestamp()
|
|
|
|
|
self._state_telemetry.update_heartbeat()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Get current collector status and statistics.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary containing status information
|
|
|
|
|
"""
|
2025-06-10 12:04:58 +08:00
|
|
|
status = self._state_telemetry.get_status()
|
|
|
|
|
|
|
|
|
|
# Add BaseDataCollector specific information
|
|
|
|
|
status.update({
|
|
|
|
|
'symbols': list(self.symbols),
|
|
|
|
|
'data_types': [dt.value for dt in self.data_types],
|
|
|
|
|
'timeframes': self.timeframes,
|
|
|
|
|
'auto_restart': self.auto_restart
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return status
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def get_health_status(self) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Get detailed health status for monitoring.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary containing health information
|
|
|
|
|
"""
|
2025-06-09 17:27:29 +08:00
|
|
|
return self._state_telemetry.get_health_status()
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def add_symbol(self, symbol: str) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Add a new symbol to collect data for.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbol: Trading symbol to add
|
|
|
|
|
"""
|
|
|
|
|
if symbol not in self.symbols:
|
|
|
|
|
self.symbols.add(symbol)
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Added symbol: {symbol}")
|
|
|
|
|
|
|
|
|
|
# If collector is running, subscribe to new symbol
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.RUNNING:
|
2025-06-01 14:42:29 +08:00
|
|
|
# Note: This needs to be called from an async context
|
|
|
|
|
# Users should handle this appropriately
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def remove_symbol(self, symbol: str) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Remove a symbol from data collection.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbol: Trading symbol to remove
|
|
|
|
|
"""
|
|
|
|
|
if symbol in self.symbols:
|
|
|
|
|
self.symbols.remove(symbol)
|
2025-06-01 14:42:29 +08:00
|
|
|
self._log_info(f"Removed symbol: {symbol}")
|
|
|
|
|
|
|
|
|
|
# If collector is running, unsubscribe from symbol
|
2025-06-09 17:27:29 +08:00
|
|
|
if self._state_telemetry.status == CollectorStatus.RUNNING:
|
2025-06-01 14:42:29 +08:00
|
|
|
# Note: This needs to be called from an async context
|
|
|
|
|
# Users should handle this appropriately
|
|
|
|
|
pass
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def validate_ohlcv_data(self, data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
|
|
|
|
|
"""
|
|
|
|
|
Validate and convert raw OHLCV data to standardized format.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
data: Raw OHLCV data dictionary
|
|
|
|
|
symbol: Trading symbol
|
|
|
|
|
timeframe: Timeframe (e.g., '1m', '5m', '1h')
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Validated OHLCVData object
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
DataValidationError: If data validation fails
|
|
|
|
|
"""
|
2025-06-10 12:04:58 +08:00
|
|
|
return validate_ohlcv_data(data, symbol, timeframe)
|
2025-05-30 20:33:56 +08:00
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
"""String representation of the collector."""
|
|
|
|
|
return f"<{self.__class__.__name__}({self.exchange_name}, {len(self.symbols)} symbols, {self.status.value})>"
|