Implement enhanced data collection system with health monitoring and management

- Introduced `BaseDataCollector` and `CollectorManager` classes for standardized data collection and centralized management.
- Added health monitoring features, including auto-restart capabilities and detailed status reporting for collectors.
- Updated `env.template` to include new logging and health check configurations.
- Enhanced documentation in `docs/data_collectors.md` to provide comprehensive guidance on the new data collection system.
- Added unit tests for `BaseDataCollector` and `CollectorManager` to ensure reliability and functionality.
This commit is contained in:
Vasily.onl 2025-05-30 20:33:56 +08:00
parent b7263b023f
commit 4936e5cd73
13 changed files with 4036 additions and 1 deletions

25
data/__init__.py Normal file
View File

@ -0,0 +1,25 @@
"""
Data collection and processing package for the Crypto Trading Bot Platform.
This package contains modules for collecting market data from various exchanges,
processing and validating the data, and storing it in the database.
"""
from .base_collector import (
BaseDataCollector, DataCollectorError, DataValidationError,
DataType, CollectorStatus, MarketDataPoint, OHLCVData
)
from .collector_manager import CollectorManager, ManagerStatus, CollectorConfig
__all__ = [
'BaseDataCollector',
'DataCollectorError',
'DataValidationError',
'DataType',
'CollectorStatus',
'MarketDataPoint',
'OHLCVData',
'CollectorManager',
'ManagerStatus',
'CollectorConfig'
]

667
data/base_collector.py Normal file
View File

@ -0,0 +1,667 @@
"""
Abstract base class for data collectors.
This module provides a common interface for all data collection implementations,
ensuring consistency across different exchange connectors and data sources.
"""
import asyncio
from abc import ABC, abstractmethod
from datetime import datetime, timezone, timedelta
from decimal import Decimal
from typing import Dict, List, Optional, Any, Callable, Set
from dataclasses import dataclass
from enum import Enum
from utils.logger import get_logger
class DataType(Enum):
"""Types of data that can be collected."""
TICKER = "ticker"
TRADE = "trade"
ORDERBOOK = "orderbook"
CANDLE = "candle"
BALANCE = "balance"
class CollectorStatus(Enum):
"""Status of the data collector."""
STOPPED = "stopped"
STARTING = "starting"
RUNNING = "running"
STOPPING = "stopping"
ERROR = "error"
RECONNECTING = "reconnecting"
UNHEALTHY = "unhealthy" # Added for health monitoring
@dataclass
class MarketDataPoint:
"""Standardized market data structure."""
exchange: str
symbol: str
timestamp: datetime
data_type: DataType
data: Dict[str, Any]
def __post_init__(self):
"""Validate data after initialization."""
if not self.timestamp.tzinfo:
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
@dataclass
class OHLCVData:
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
symbol: str
timeframe: str
timestamp: datetime
open: Decimal
high: Decimal
low: Decimal
close: Decimal
volume: Decimal
trades_count: Optional[int] = None
def __post_init__(self):
"""Validate OHLCV data after initialization."""
if not self.timestamp.tzinfo:
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
# Validate price data
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
raise DataValidationError("All OHLCV prices must be numeric")
if not isinstance(self.volume, (Decimal, float, int)):
raise DataValidationError("Volume must be numeric")
# Convert to Decimal for precision
self.open = Decimal(str(self.open))
self.high = Decimal(str(self.high))
self.low = Decimal(str(self.low))
self.close = Decimal(str(self.close))
self.volume = Decimal(str(self.volume))
# Validate price relationships
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
class DataCollectorError(Exception):
"""Base exception for data collector errors."""
pass
class DataValidationError(DataCollectorError):
"""Exception raised when data validation fails."""
pass
class ConnectionError(DataCollectorError):
"""Exception raised when connection to data source fails."""
pass
class BaseDataCollector(ABC):
"""
Abstract base class for all data collectors.
This class defines the interface that all data collection implementations
must follow, providing consistency across different exchanges and data sources.
"""
def __init__(self,
exchange_name: str,
symbols: List[str],
data_types: Optional[List[DataType]] = None,
component_name: Optional[str] = None,
auto_restart: bool = True,
health_check_interval: float = 30.0):
"""
Initialize the base data collector.
Args:
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
symbols: List of trading symbols to collect data for
data_types: Types of data to collect (default: [DataType.CANDLE])
component_name: Name for logging (default: based on exchange_name)
auto_restart: Enable automatic restart on failures (default: True)
health_check_interval: Seconds between health checks (default: 30.0)
"""
self.exchange_name = exchange_name.lower()
self.symbols = set(symbols)
self.data_types = data_types or [DataType.CANDLE]
self.auto_restart = auto_restart
self.health_check_interval = health_check_interval
# Initialize logger
component = component_name or f"{self.exchange_name}_collector"
self.logger = get_logger(component, verbose=True)
# Collector state
self.status = CollectorStatus.STOPPED
self._running = False
self._should_be_running = False # Track desired state
self._tasks: Set[asyncio.Task] = set()
# Data callbacks
self._data_callbacks: Dict[DataType, List[Callable]] = {
data_type: [] for data_type in DataType
}
# Connection management
self._connection = None
self._reconnect_attempts = 0
self._max_reconnect_attempts = 5
self._reconnect_delay = 5.0 # seconds
# Health monitoring
self._last_heartbeat = datetime.now(timezone.utc)
self._last_data_received = None
self._health_check_task = None
self._max_silence_duration = timedelta(minutes=5) # Max time without data before unhealthy
# Statistics
self._stats = {
'messages_received': 0,
'messages_processed': 0,
'errors': 0,
'restarts': 0,
'last_message_time': None,
'connection_uptime': None,
'last_error': None,
'last_restart_time': None
}
self.logger.info(f"Initialized {self.exchange_name} data collector for symbols: {', '.join(symbols)}")
@abstractmethod
async def connect(self) -> bool:
"""
Establish connection to the data source.
Returns:
True if connection successful, False otherwise
"""
pass
@abstractmethod
async def disconnect(self) -> None:
"""Disconnect from the data source."""
pass
@abstractmethod
async def subscribe_to_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
"""
Subscribe to data streams for specified symbols and data types.
Args:
symbols: Trading symbols to subscribe to
data_types: Types of data to subscribe to
Returns:
True if subscription successful, False otherwise
"""
pass
@abstractmethod
async def unsubscribe_from_data(self, symbols: List[str], data_types: List[DataType]) -> bool:
"""
Unsubscribe from data streams.
Args:
symbols: Trading symbols to unsubscribe from
data_types: Types of data to unsubscribe from
Returns:
True if unsubscription successful, False otherwise
"""
pass
@abstractmethod
async def _process_message(self, message: Any) -> Optional[MarketDataPoint]:
"""
Process incoming message from the data source.
Args:
message: Raw message from the data source
Returns:
Processed MarketDataPoint or None if message should be ignored
"""
pass
async def start(self) -> bool:
"""
Start the data collector.
Returns:
True if started successfully, False otherwise
"""
if self.status in [CollectorStatus.RUNNING, CollectorStatus.STARTING]:
self.logger.warning("Data collector is already running or starting")
return True
self.logger.info(f"Starting {self.exchange_name} data collector")
self.status = CollectorStatus.STARTING
self._should_be_running = True
try:
# Connect to data source
if not await self.connect():
self.status = CollectorStatus.ERROR
self.logger.error("Failed to connect to data source")
return False
# Subscribe to data streams
if not await self.subscribe_to_data(list(self.symbols), self.data_types):
self.status = CollectorStatus.ERROR
self.logger.error("Failed to subscribe to data streams")
await self.disconnect()
return False
# Start message processing
self._running = True
self.status = CollectorStatus.RUNNING
self._stats['connection_uptime'] = datetime.now(timezone.utc)
self._last_heartbeat = datetime.now(timezone.utc)
# Create background task for message processing
message_task = asyncio.create_task(self._message_loop())
self._tasks.add(message_task)
message_task.add_done_callback(self._tasks.discard)
# Start health monitoring
if self.auto_restart:
health_task = asyncio.create_task(self._health_monitor())
self._tasks.add(health_task)
health_task.add_done_callback(self._tasks.discard)
self.logger.info(f"{self.exchange_name} data collector started successfully")
return True
except Exception as e:
self.status = CollectorStatus.ERROR
self._stats['last_error'] = str(e)
self.logger.error(f"Failed to start data collector: {e}")
await self.disconnect()
return False
async def stop(self, force: bool = False) -> None:
"""
Stop the data collector.
Args:
force: If True, don't restart automatically even if auto_restart is enabled
"""
if self.status == CollectorStatus.STOPPED:
self.logger.warning("Data collector is already stopped")
return
self.logger.info(f"Stopping {self.exchange_name} data collector")
self.status = CollectorStatus.STOPPING
self._running = False
if force:
self._should_be_running = False
try:
# Cancel all tasks
for task in list(self._tasks):
task.cancel()
# Wait for tasks to complete
if self._tasks:
await asyncio.gather(*self._tasks, return_exceptions=True)
# Unsubscribe and disconnect
await self.unsubscribe_from_data(list(self.symbols), self.data_types)
await self.disconnect()
self.status = CollectorStatus.STOPPED
self.logger.info(f"{self.exchange_name} data collector stopped")
except Exception as e:
self.status = CollectorStatus.ERROR
self._stats['last_error'] = str(e)
self.logger.error(f"Error stopping data collector: {e}")
async def restart(self) -> bool:
"""
Restart the data collector.
Returns:
True if restart successful, False otherwise
"""
self.logger.info(f"Restarting {self.exchange_name} data collector")
self._stats['restarts'] += 1
self._stats['last_restart_time'] = datetime.now(timezone.utc)
# Stop without disabling auto-restart
await self.stop(force=False)
# Wait a bit before restart
await asyncio.sleep(2.0)
# Reset reconnection attempts
self._reconnect_attempts = 0
# Start again
return await self.start()
async def _message_loop(self) -> None:
"""Main message processing loop."""
self.logger.debug("Starting message processing loop")
while self._running:
try:
# This should be implemented by subclasses to handle their specific message loop
await self._handle_messages()
# Update heartbeat
self._last_heartbeat = datetime.now(timezone.utc)
except asyncio.CancelledError:
self.logger.debug("Message loop cancelled")
break
except Exception as e:
self._stats['errors'] += 1
self._stats['last_error'] = str(e)
self.logger.error(f"Error in message loop: {e}")
# Attempt reconnection if connection lost
if not await self._handle_connection_error():
break
await asyncio.sleep(1) # Brief pause before retrying
async def _health_monitor(self) -> None:
"""Monitor collector health and restart if needed."""
self.logger.debug("Starting health monitor")
while self._running and self.auto_restart:
try:
await asyncio.sleep(self.health_check_interval)
# Check if we should be running but aren't
if self._should_be_running and not self._running:
self.logger.warning("Collector should be running but isn't - restarting")
await self.restart()
continue
# Check heartbeat freshness
time_since_heartbeat = datetime.now(timezone.utc) - self._last_heartbeat
if time_since_heartbeat > timedelta(seconds=self.health_check_interval * 2):
self.logger.warning(f"No heartbeat for {time_since_heartbeat.total_seconds():.1f}s - restarting")
self.status = CollectorStatus.UNHEALTHY
await self.restart()
continue
# Check data freshness (if we've received data before)
if self._last_data_received:
time_since_data = datetime.now(timezone.utc) - self._last_data_received
if time_since_data > self._max_silence_duration:
self.logger.warning(f"No data received for {time_since_data.total_seconds():.1f}s - restarting")
self.status = CollectorStatus.UNHEALTHY
await self.restart()
continue
# Check if status indicates failure
if self.status in [CollectorStatus.ERROR, CollectorStatus.UNHEALTHY]:
self.logger.warning(f"Collector in {self.status.value} status - restarting")
await self.restart()
continue
except asyncio.CancelledError:
self.logger.debug("Health monitor cancelled")
break
except Exception as e:
self.logger.error(f"Error in health monitor: {e}")
await asyncio.sleep(self.health_check_interval)
@abstractmethod
async def _handle_messages(self) -> None:
"""
Handle incoming messages from the data source.
This method should be implemented by subclasses to handle their specific message format.
"""
pass
async def _handle_connection_error(self) -> bool:
"""
Handle connection errors and attempt reconnection.
Returns:
True if reconnection successful, False if max attempts exceeded
"""
if self._reconnect_attempts >= self._max_reconnect_attempts:
self.logger.error(f"Max reconnection attempts ({self._max_reconnect_attempts}) exceeded")
self.status = CollectorStatus.ERROR
return False
self._reconnect_attempts += 1
self.status = CollectorStatus.RECONNECTING
self.logger.warning(f"Connection lost. Attempting reconnection {self._reconnect_attempts}/{self._max_reconnect_attempts}")
await asyncio.sleep(self._reconnect_delay)
try:
if await self.connect():
if await self.subscribe_to_data(list(self.symbols), self.data_types):
self.status = CollectorStatus.RUNNING
self._reconnect_attempts = 0
self._stats['connection_uptime'] = datetime.now(timezone.utc)
self.logger.info("Reconnection successful")
return True
return False
except Exception as e:
self._stats['last_error'] = str(e)
self.logger.error(f"Reconnection attempt failed: {e}")
return False
def add_data_callback(self, data_type: DataType, callback: Callable[[MarketDataPoint], None]) -> None:
"""
Add a callback function to be called when data of specified type is received.
Args:
data_type: Type of data to register callback for
callback: Function to call with MarketDataPoint data
"""
self._data_callbacks[data_type].append(callback)
self.logger.debug(f"Added callback for {data_type.value} data")
def remove_data_callback(self, data_type: DataType, callback: Callable[[MarketDataPoint], None]) -> None:
"""
Remove a data callback.
Args:
data_type: Type of data to remove callback for
callback: Callback function to remove
"""
if callback in self._data_callbacks[data_type]:
self._data_callbacks[data_type].remove(callback)
self.logger.debug(f"Removed callback for {data_type.value} data")
async def _notify_callbacks(self, data_point: MarketDataPoint) -> None:
"""
Notify all registered callbacks for the data type.
Args:
data_point: Market data to send to callbacks
"""
# Update data received timestamp
self._last_data_received = datetime.now(timezone.utc)
self._stats['last_message_time'] = self._last_data_received
callbacks = self._data_callbacks.get(data_point.data_type, [])
for callback in callbacks:
try:
if asyncio.iscoroutinefunction(callback):
await callback(data_point)
else:
callback(data_point)
except Exception as e:
self.logger.error(f"Error in data callback: {e}")
def get_status(self) -> Dict[str, Any]:
"""
Get current collector status and statistics.
Returns:
Dictionary containing status information
"""
uptime_seconds = None
if self._stats['connection_uptime']:
uptime_seconds = (datetime.now(timezone.utc) - self._stats['connection_uptime']).total_seconds()
time_since_heartbeat = None
if self._last_heartbeat:
time_since_heartbeat = (datetime.now(timezone.utc) - self._last_heartbeat).total_seconds()
time_since_data = None
if self._last_data_received:
time_since_data = (datetime.now(timezone.utc) - self._last_data_received).total_seconds()
return {
'exchange': self.exchange_name,
'status': self.status.value,
'should_be_running': self._should_be_running,
'symbols': list(self.symbols),
'data_types': [dt.value for dt in self.data_types],
'auto_restart': self.auto_restart,
'health': {
'time_since_heartbeat': time_since_heartbeat,
'time_since_data': time_since_data,
'max_silence_duration': self._max_silence_duration.total_seconds()
},
'statistics': {
**self._stats,
'uptime_seconds': uptime_seconds,
'reconnect_attempts': self._reconnect_attempts
}
}
def get_health_status(self) -> Dict[str, Any]:
"""
Get detailed health status for monitoring.
Returns:
Dictionary containing health information
"""
now = datetime.now(timezone.utc)
is_healthy = True
health_issues = []
# Check if should be running but isn't
if self._should_be_running and not self._running:
is_healthy = False
health_issues.append("Should be running but is stopped")
# Check heartbeat
if self._last_heartbeat:
time_since_heartbeat = now - self._last_heartbeat
if time_since_heartbeat > timedelta(seconds=self.health_check_interval * 2):
is_healthy = False
health_issues.append(f"No heartbeat for {time_since_heartbeat.total_seconds():.1f}s")
# Check data freshness
if self._last_data_received:
time_since_data = now - self._last_data_received
if time_since_data > self._max_silence_duration:
is_healthy = False
health_issues.append(f"No data for {time_since_data.total_seconds():.1f}s")
# Check status
if self.status in [CollectorStatus.ERROR, CollectorStatus.UNHEALTHY]:
is_healthy = False
health_issues.append(f"Status: {self.status.value}")
return {
'is_healthy': is_healthy,
'issues': health_issues,
'status': self.status.value,
'last_heartbeat': self._last_heartbeat.isoformat() if self._last_heartbeat else None,
'last_data_received': self._last_data_received.isoformat() if self._last_data_received else None,
'should_be_running': self._should_be_running,
'is_running': self._running
}
def add_symbol(self, symbol: str) -> None:
"""
Add a new symbol to collect data for.
Args:
symbol: Trading symbol to add
"""
if symbol not in self.symbols:
self.symbols.add(symbol)
self.logger.info(f"Added symbol: {symbol}")
def remove_symbol(self, symbol: str) -> None:
"""
Remove a symbol from data collection.
Args:
symbol: Trading symbol to remove
"""
if symbol in self.symbols:
self.symbols.remove(symbol)
self.logger.info(f"Removed symbol: {symbol}")
def validate_ohlcv_data(self, data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
"""
Validate and convert raw OHLCV data to standardized format.
Args:
data: Raw OHLCV data dictionary
symbol: Trading symbol
timeframe: Timeframe (e.g., '1m', '5m', '1h')
Returns:
Validated OHLCVData object
Raises:
DataValidationError: If data validation fails
"""
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
# Check required fields
for field in required_fields:
if field not in data:
raise DataValidationError(f"Missing required field: {field}")
try:
# Parse timestamp
timestamp = data['timestamp']
if isinstance(timestamp, (int, float)):
# Assume Unix timestamp in milliseconds
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
elif isinstance(timestamp, str):
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
elif not isinstance(timestamp, datetime):
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
return OHLCVData(
symbol=symbol,
timeframe=timeframe,
timestamp=timestamp,
open=Decimal(str(data['open'])),
high=Decimal(str(data['high'])),
low=Decimal(str(data['low'])),
close=Decimal(str(data['close'])),
volume=Decimal(str(data['volume'])),
trades_count=data.get('trades_count')
)
except (ValueError, TypeError, KeyError) as e:
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
def __repr__(self) -> str:
"""String representation of the collector."""
return f"<{self.__class__.__name__}({self.exchange_name}, {len(self.symbols)} symbols, {self.status.value})>"

529
data/collector_manager.py Normal file
View File

@ -0,0 +1,529 @@
"""
Data Collector Manager for supervising and managing multiple data collectors.
This module provides centralized management of data collectors with health monitoring,
auto-recovery, and coordinated lifecycle management.
"""
import asyncio
import time
from datetime import datetime, timezone, timedelta
from typing import Dict, List, Optional, Any, Set
from dataclasses import dataclass
from enum import Enum
from utils.logger import get_logger
from .base_collector import BaseDataCollector, CollectorStatus
class ManagerStatus(Enum):
"""Status of the collector manager."""
STOPPED = "stopped"
STARTING = "starting"
RUNNING = "running"
STOPPING = "stopping"
ERROR = "error"
@dataclass
class CollectorConfig:
"""Configuration for a data collector."""
name: str
exchange: str
symbols: List[str]
data_types: List[str]
auto_restart: bool = True
health_check_interval: float = 30.0
enabled: bool = True
class CollectorManager:
"""
Manages multiple data collectors with health monitoring and auto-recovery.
The manager is responsible for:
- Starting and stopping collectors
- Health monitoring and auto-restart
- Coordinated lifecycle management
- Status reporting and metrics
"""
def __init__(self,
manager_name: str = "collector_manager",
global_health_check_interval: float = 60.0,
restart_delay: float = 5.0):
"""
Initialize the collector manager.
Args:
manager_name: Name for logging
global_health_check_interval: Seconds between global health checks
restart_delay: Delay between restart attempts
"""
self.manager_name = manager_name
self.global_health_check_interval = global_health_check_interval
self.restart_delay = restart_delay
# Initialize logger
self.logger = get_logger(f"data_collector_manager", verbose=True)
# Manager state
self.status = ManagerStatus.STOPPED
self._running = False
self._tasks: Set[asyncio.Task] = set()
# Collector management
self._collectors: Dict[str, BaseDataCollector] = {}
self._collector_configs: Dict[str, CollectorConfig] = {}
self._enabled_collectors: Set[str] = set()
# Health monitoring
self._last_global_check = datetime.now(timezone.utc)
self._global_health_task = None
# Statistics
self._stats = {
'total_collectors': 0,
'running_collectors': 0,
'failed_collectors': 0,
'restarts_performed': 0,
'last_global_check': None,
'uptime_start': None
}
self.logger.info(f"Initialized collector manager: {manager_name}")
def add_collector(self,
collector: BaseDataCollector,
config: Optional[CollectorConfig] = None) -> None:
"""
Add a collector to be managed.
Args:
collector: Data collector instance
config: Optional configuration (will create default if not provided)
"""
# Use a more unique name to avoid duplicates
collector_name = f"{collector.exchange_name}_{int(time.time() * 1000000) % 1000000}"
# Ensure unique name
counter = 1
base_name = collector_name
while collector_name in self._collectors:
collector_name = f"{base_name}_{counter}"
counter += 1
if config is None:
config = CollectorConfig(
name=collector_name,
exchange=collector.exchange_name,
symbols=list(collector.symbols),
data_types=[dt.value for dt in collector.data_types],
auto_restart=collector.auto_restart,
health_check_interval=collector.health_check_interval
)
self._collectors[collector_name] = collector
self._collector_configs[collector_name] = config
if config.enabled:
self._enabled_collectors.add(collector_name)
self._stats['total_collectors'] = len(self._collectors)
self.logger.info(f"Added collector: {collector_name} ({collector.exchange_name}) - "
f"Symbols: {', '.join(collector.symbols)} - Enabled: {config.enabled}")
def remove_collector(self, collector_name: str) -> bool:
"""
Remove a collector from management.
Args:
collector_name: Name of the collector to remove
Returns:
True if removed successfully, False if not found
"""
if collector_name not in self._collectors:
self.logger.warning(f"Collector not found: {collector_name}")
return False
# Stop the collector first (only if event loop is running)
collector = self._collectors[collector_name]
if collector.status != CollectorStatus.STOPPED:
try:
# Try to create task only if event loop is running
asyncio.create_task(collector.stop(force=True))
except RuntimeError:
# No event loop running, just log
self.logger.info(f"Collector {collector_name} will be removed without stopping (no event loop)")
# Remove from management
del self._collectors[collector_name]
del self._collector_configs[collector_name]
self._enabled_collectors.discard(collector_name)
self._stats['total_collectors'] = len(self._collectors)
self.logger.info(f"Removed collector: {collector_name}")
return True
def enable_collector(self, collector_name: str) -> bool:
"""
Enable a collector (will be started if manager is running).
Args:
collector_name: Name of the collector to enable
Returns:
True if enabled successfully, False if not found
"""
if collector_name not in self._collectors:
self.logger.warning(f"Collector not found: {collector_name}")
return False
self._enabled_collectors.add(collector_name)
self._collector_configs[collector_name].enabled = True
# Start the collector if manager is running (only if event loop is running)
if self._running:
try:
asyncio.create_task(self._start_collector(collector_name))
except RuntimeError:
# No event loop running, will be started when manager starts
self.logger.debug(f"Collector {collector_name} enabled but will start when manager starts")
self.logger.info(f"Enabled collector: {collector_name}")
return True
def disable_collector(self, collector_name: str) -> bool:
"""
Disable a collector (will be stopped if running).
Args:
collector_name: Name of the collector to disable
Returns:
True if disabled successfully, False if not found
"""
if collector_name not in self._collectors:
self.logger.warning(f"Collector not found: {collector_name}")
return False
self._enabled_collectors.discard(collector_name)
self._collector_configs[collector_name].enabled = False
# Stop the collector (only if event loop is running)
collector = self._collectors[collector_name]
try:
asyncio.create_task(collector.stop(force=True))
except RuntimeError:
# No event loop running, just log
self.logger.debug(f"Collector {collector_name} disabled but cannot stop (no event loop)")
self.logger.info(f"Disabled collector: {collector_name}")
return True
async def start(self) -> bool:
"""
Start the collector manager and all enabled collectors.
Returns:
True if started successfully, False otherwise
"""
if self.status in [ManagerStatus.RUNNING, ManagerStatus.STARTING]:
self.logger.warning("Collector manager is already running or starting")
return True
self.logger.info("Starting collector manager")
self.status = ManagerStatus.STARTING
try:
self._running = True
self._stats['uptime_start'] = datetime.now(timezone.utc)
# Start all enabled collectors
start_tasks = []
for collector_name in self._enabled_collectors:
task = asyncio.create_task(self._start_collector(collector_name))
start_tasks.append(task)
# Wait for all collectors to start (with timeout)
if start_tasks:
try:
await asyncio.wait_for(asyncio.gather(*start_tasks, return_exceptions=True), timeout=30.0)
except asyncio.TimeoutError:
self.logger.warning("Some collectors took too long to start")
# Start global health monitoring
health_task = asyncio.create_task(self._global_health_monitor())
self._tasks.add(health_task)
health_task.add_done_callback(self._tasks.discard)
self.status = ManagerStatus.RUNNING
self.logger.info(f"Collector manager started - Managing {len(self._enabled_collectors)} collectors")
return True
except Exception as e:
self.status = ManagerStatus.ERROR
self.logger.error(f"Failed to start collector manager: {e}")
return False
async def stop(self) -> None:
"""Stop the collector manager and all collectors."""
if self.status == ManagerStatus.STOPPED:
self.logger.warning("Collector manager is already stopped")
return
self.logger.info("Stopping collector manager")
self.status = ManagerStatus.STOPPING
self._running = False
try:
# Cancel manager tasks
for task in list(self._tasks):
task.cancel()
if self._tasks:
await asyncio.gather(*self._tasks, return_exceptions=True)
# Stop all collectors
stop_tasks = []
for collector in self._collectors.values():
task = asyncio.create_task(collector.stop(force=True))
stop_tasks.append(task)
# Wait for all collectors to stop (with timeout)
if stop_tasks:
try:
await asyncio.wait_for(asyncio.gather(*stop_tasks, return_exceptions=True), timeout=30.0)
except asyncio.TimeoutError:
self.logger.warning("Some collectors took too long to stop")
self.status = ManagerStatus.STOPPED
self.logger.info("Collector manager stopped")
except Exception as e:
self.status = ManagerStatus.ERROR
self.logger.error(f"Error stopping collector manager: {e}")
async def restart_collector(self, collector_name: str) -> bool:
"""
Restart a specific collector.
Args:
collector_name: Name of the collector to restart
Returns:
True if restarted successfully, False otherwise
"""
if collector_name not in self._collectors:
self.logger.warning(f"Collector not found: {collector_name}")
return False
collector = self._collectors[collector_name]
self.logger.info(f"Restarting collector: {collector_name}")
try:
success = await collector.restart()
if success:
self._stats['restarts_performed'] += 1
self.logger.info(f"Successfully restarted collector: {collector_name}")
else:
self.logger.error(f"Failed to restart collector: {collector_name}")
return success
except Exception as e:
self.logger.error(f"Error restarting collector {collector_name}: {e}")
return False
async def _start_collector(self, collector_name: str) -> bool:
"""
Start a specific collector.
Args:
collector_name: Name of the collector to start
Returns:
True if started successfully, False otherwise
"""
if collector_name not in self._collectors:
self.logger.warning(f"Collector not found: {collector_name}")
return False
collector = self._collectors[collector_name]
try:
success = await collector.start()
if success:
self.logger.info(f"Started collector: {collector_name}")
else:
self.logger.error(f"Failed to start collector: {collector_name}")
return success
except Exception as e:
self.logger.error(f"Error starting collector {collector_name}: {e}")
return False
async def _global_health_monitor(self) -> None:
"""Global health monitoring for all collectors."""
self.logger.debug("Starting global health monitor")
while self._running:
try:
await asyncio.sleep(self.global_health_check_interval)
self._last_global_check = datetime.now(timezone.utc)
self._stats['last_global_check'] = self._last_global_check
# Check each enabled collector
running_count = 0
failed_count = 0
for collector_name in self._enabled_collectors:
collector = self._collectors[collector_name]
health_status = collector.get_health_status()
if health_status['is_healthy'] and collector.status == CollectorStatus.RUNNING:
running_count += 1
elif not health_status['is_healthy']:
failed_count += 1
self.logger.warning(f"Collector {collector_name} is unhealthy: {health_status['issues']}")
# Auto-restart if needed and not already restarting
if (collector.auto_restart and
collector.status not in [CollectorStatus.STARTING, CollectorStatus.STOPPING]):
self.logger.info(f"Auto-restarting unhealthy collector: {collector_name}")
asyncio.create_task(self.restart_collector(collector_name))
# Update global statistics
self._stats['running_collectors'] = running_count
self._stats['failed_collectors'] = failed_count
self.logger.debug(f"Health check complete - Running: {running_count}, Failed: {failed_count}")
except asyncio.CancelledError:
self.logger.debug("Global health monitor cancelled")
break
except Exception as e:
self.logger.error(f"Error in global health monitor: {e}")
await asyncio.sleep(self.global_health_check_interval)
def get_status(self) -> Dict[str, Any]:
"""
Get manager status and statistics.
Returns:
Dictionary containing status information
"""
uptime_seconds = None
if self._stats['uptime_start']:
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
# Get individual collector statuses
collector_statuses = {}
for name, collector in self._collectors.items():
collector_statuses[name] = {
'status': collector.status.value,
'enabled': name in self._enabled_collectors,
'health': collector.get_health_status()
}
return {
'manager_status': self.status.value,
'uptime_seconds': uptime_seconds,
'statistics': self._stats,
'collectors': collector_statuses,
'enabled_collectors': list(self._enabled_collectors),
'total_collectors': len(self._collectors)
}
def get_collector_status(self, collector_name: str) -> Optional[Dict[str, Any]]:
"""
Get status for a specific collector.
Args:
collector_name: Name of the collector
Returns:
Collector status dict or None if not found
"""
if collector_name not in self._collectors:
return None
collector = self._collectors[collector_name]
return {
'name': collector_name,
'config': self._collector_configs[collector_name].__dict__,
'status': collector.get_status(),
'health': collector.get_health_status()
}
def list_collectors(self) -> List[str]:
"""
List all managed collector names.
Returns:
List of collector names
"""
return list(self._collectors.keys())
def get_running_collectors(self) -> List[str]:
"""
Get names of currently running collectors.
Returns:
List of running collector names
"""
running = []
for name, collector in self._collectors.items():
if collector.status == CollectorStatus.RUNNING:
running.append(name)
return running
def get_failed_collectors(self) -> List[str]:
"""
Get names of failed or unhealthy collectors.
Returns:
List of failed collector names
"""
failed = []
for name, collector in self._collectors.items():
health_status = collector.get_health_status()
if not health_status['is_healthy']:
failed.append(name)
return failed
async def restart_all_collectors(self) -> Dict[str, bool]:
"""
Restart all enabled collectors.
Returns:
Dictionary mapping collector names to restart success status
"""
self.logger.info("Restarting all enabled collectors")
results = {}
restart_tasks = []
for collector_name in self._enabled_collectors:
task = asyncio.create_task(self.restart_collector(collector_name))
restart_tasks.append((collector_name, task))
# Wait for all restarts to complete
for collector_name, task in restart_tasks:
try:
results[collector_name] = await task
except Exception as e:
self.logger.error(f"Error restarting {collector_name}: {e}")
results[collector_name] = False
successful_restarts = sum(1 for success in results.values() if success)
self.logger.info(f"Restart complete - {successful_restarts}/{len(results)} collectors restarted successfully")
return results
def __repr__(self) -> str:
"""String representation of the manager."""
return f"<CollectorManager({self.manager_name}, {len(self._collectors)} collectors, {self.status.value})>"

228
docs/README.md Normal file
View File

@ -0,0 +1,228 @@
# TCP Dashboard Documentation
Welcome to the **TCP Dashboard** (Trading Crypto Platform) documentation. This platform provides a comprehensive solution for cryptocurrency trading bot development, backtesting, and portfolio management.
## 📚 Documentation Index
### 🏗️ **Architecture & Design**
- **[Architecture Overview](architecture.md)** - High-level system architecture and component design
- **[Project Specification](specification.md)** - Technical specifications and requirements
- **[Crypto Bot PRD](crypto-bot-prd.md)** - Product Requirements Document for the crypto trading bot platform
### 🚀 **Setup & Installation**
- **[Setup Guide](setup.md)** - Comprehensive setup instructions for new machines and environments
- Environment configuration
- Database setup with Docker
- Development workflow
- Production deployment
### 🔧 **Core Systems**
#### Data Collection System
- **[Data Collectors Documentation](data_collectors.md)** - *Comprehensive guide to the enhanced data collector system*
- **BaseDataCollector** abstract class with health monitoring
- **CollectorManager** for centralized management
- Auto-restart and failure recovery
- Health monitoring and alerting
- Performance optimization
- Integration examples
- Troubleshooting guide
#### Logging System
- **[Enhanced Logging System](logging.md)** - Unified logging framework
- Multi-level logging with automatic cleanup
- Console and file output with formatting
- Performance monitoring
- Integration across all components
## 🎯 **Quick Start**
1. **New to the platform?** Start with the [Setup Guide](setup.md)
2. **Implementing data collectors?** See [Data Collectors Documentation](data_collectors.md)
3. **Understanding the architecture?** Read [Architecture Overview](architecture.md)
4. **Troubleshooting?** Check component-specific documentation
## 🏛️ **System Components**
### Core Infrastructure
- **Database Layer**: PostgreSQL with SQLAlchemy models
- **Real-time Messaging**: Redis pub/sub for data distribution
- **Configuration Management**: Pydantic-based settings
- **Containerization**: Docker and docker-compose setup
### Data Collection & Processing
- **Abstract Base Collectors**: Standardized interface for all exchange connectors
- **Health Monitoring**: Automatic failure detection and recovery
- **Data Validation**: Comprehensive validation for market data
- **Multi-Exchange Support**: OKX, Binance, and extensible framework
### Trading & Strategy Engine
- **Strategy Framework**: Base strategy classes and implementations
- **Bot Management**: Lifecycle management with JSON configuration
- **Backtesting Engine**: Historical strategy testing with performance metrics
- **Portfolio Management**: Virtual trading with P&L tracking
### User Interface
- **Dashboard**: Dash-based web interface with Mantine UI
- **Real-time Charts**: Interactive price charts with technical indicators
- **Bot Controls**: Start/stop/configure trading bots
- **Performance Analytics**: Portfolio visualization and trade analytics
## 📋 **Task Progress**
The platform follows a structured development approach with clearly defined tasks:
- ✅ **Database Foundation** - Complete
- ✅ **Enhanced Data Collectors** - Complete with health monitoring
- ⏳ **Market Data Collection** - In progress (OKX connector next)
- ⏳ **Basic Dashboard** - Planned
- ⏳ **Strategy Engine** - Planned
- ⏳ **Advanced Features** - Planned
For detailed task tracking, see [tasks/tasks-crypto-bot-prd.md](../tasks/tasks-crypto-bot-prd.md).
## 🛠️ **Development Workflow**
### Setting Up Development Environment
```bash
# Clone and setup
git clone <repository>
cd TCPDashboard
# Install dependencies with UV
uv sync
# Setup environment
cp .env.example .env
# Edit .env with your configuration
# Start services
docker-compose up -d
# Initialize database
uv run python scripts/init_database.py
# Run tests
uv run pytest
```
### Key Development Tools
- **UV**: Modern Python package management
- **pytest**: Testing framework with async support
- **SQLAlchemy**: Database ORM with migration support
- **Dash + Mantine**: Modern web UI framework
- **Docker**: Containerized development environment
## 🔍 **Testing**
The platform includes comprehensive test coverage:
- **Unit Tests**: Individual component testing
- **Integration Tests**: Cross-component functionality
- **Performance Tests**: Load and stress testing
- **End-to-End Tests**: Full system workflows
```bash
# Run all tests
uv run pytest
# Run specific test files
uv run pytest tests/test_base_collector.py
uv run pytest tests/test_collector_manager.py
# Run with coverage
uv run pytest --cov=data --cov-report=html
```
## 📊 **Monitoring & Observability**
### Logging
- **Structured Logging**: JSON-formatted logs with automatic cleanup
- **Multiple Levels**: Debug, Info, Warning, Error with configurable output
- **Component Isolation**: Separate loggers for different system components
### Health Monitoring
- **Collector Health**: Real-time status and performance metrics
- **Auto-Recovery**: Automatic restart on failures
- **Performance Tracking**: Message rates, uptime, error rates
### Metrics Integration
- **Prometheus Support**: Built-in metrics collection
- **Custom Dashboards**: System performance visualization
- **Alerting**: Configurable alerts for system health
## 🔐 **Security & Best Practices**
### Configuration Management
- **Environment Variables**: All sensitive data via `.env` files
- **No Hardcoded Secrets**: Clean separation of configuration and code
- **Validation**: Pydantic-based configuration validation
### Data Handling
- **Input Validation**: Comprehensive validation for all external data
- **Error Handling**: Robust error handling with proper logging
- **Resource Management**: Proper cleanup and resource management
### Code Quality
- **Type Hints**: Full type annotation coverage
- **Documentation**: Comprehensive docstrings and comments
- **Testing**: High test coverage with multiple test types
- **Code Standards**: Consistent formatting and patterns
## 🤝 **Contributing**
### Development Guidelines
1. Follow existing code patterns and architecture
2. Add comprehensive tests for new functionality
3. Update documentation for API changes
4. Use type hints and proper error handling
5. Follow the existing logging patterns
### Code Review Process
1. Create feature branches from main
2. Write tests before implementing features
3. Ensure all tests pass and maintain coverage
4. Update relevant documentation
5. Submit pull requests with clear descriptions
## 📞 **Support**
### Getting Help
1. **Documentation**: Check relevant component documentation
2. **Logs**: Review system logs in `./logs/` directory
3. **Status**: Use built-in status and health check methods
4. **Tests**: Run test suite to verify system integrity
### Common Issues
- **Database Connection**: Check Docker services and environment variables
- **Collector Failures**: Review collector health status and logs
- **Performance Issues**: Monitor system resources and optimize accordingly
---
## 📁 **File Structure**
```
TCPDashboard/
├── docs/ # Documentation (you are here)
├── data/ # Data collection system
├── database/ # Database models and utilities
├── utils/ # Shared utilities (logging, etc.)
├── tests/ # Test suite
├── examples/ # Usage examples
├── config/ # Configuration files
├── logs/ # Application logs
└── scripts/ # Utility scripts
```
---
*Last updated: $(date)*
For the most current information, refer to the individual component documentation linked above.

1159
docs/data_collectors.md Normal file

File diff suppressed because it is too large Load Diff

View File

@ -35,4 +35,15 @@ DEFAULT_VIRTUAL_BALANCE=10000
# Data Configuration
MARKET_DATA_SYMBOLS=BTC-USDT,ETH-USDT,LTC-USDT
HISTORICAL_DATA_DAYS=30
CHART_UPDATE_INTERVAL=2000 # milliseconds
CHART_UPDATE_INTERVAL=2000 # milliseconds
# Logging
VERBOSE_LOGGING = true
LOG_CLEANUP=true # Enable automatic log cleanup
LOG_MAX_FILES=30 # Maximum log files to retain
# Health monitoring
DEFAULT_HEALTH_CHECK_INTERVAL=30 # Default health check interval (seconds)
MAX_SILENCE_DURATION=300 # Max time without data (seconds)
MAX_RECONNECT_ATTEMPTS=5 # Maximum reconnection attempts
RECONNECT_DELAY=5 # Delay between reconnect attempts (seconds)

309
examples/collector_demo.py Normal file
View File

@ -0,0 +1,309 @@
"""
Demonstration of the enhanced data collector system with health monitoring and auto-restart.
This example shows how to:
1. Create data collectors with health monitoring
2. Use the collector manager for coordinated management
3. Monitor collector health and handle failures
4. Enable/disable collectors dynamically
"""
import asyncio
from datetime import datetime, timezone
from typing import Any, Optional
from data import (
BaseDataCollector, DataType, CollectorStatus, MarketDataPoint,
CollectorManager, CollectorConfig
)
class DemoDataCollector(BaseDataCollector):
"""
Demo implementation of a data collector for demonstration purposes.
This collector simulates receiving market data and can be configured
to fail periodically to demonstrate auto-restart functionality.
"""
def __init__(self,
exchange_name: str,
symbols: list,
fail_every_n_messages: int = 0,
connection_delay: float = 0.1):
"""
Initialize demo collector.
Args:
exchange_name: Name of the exchange
symbols: Trading symbols to collect
fail_every_n_messages: Simulate failure every N messages (0 = no failures)
connection_delay: Simulated connection delay
"""
super().__init__(exchange_name, symbols, [DataType.TICKER])
self.fail_every_n_messages = fail_every_n_messages
self.connection_delay = connection_delay
self.message_count = 0
self.connected = False
self.subscribed = False
async def connect(self) -> bool:
"""Simulate connection to exchange."""
print(f"[{self.exchange_name}] Connecting...")
await asyncio.sleep(self.connection_delay)
self.connected = True
print(f"[{self.exchange_name}] Connected successfully")
return True
async def disconnect(self) -> None:
"""Simulate disconnection from exchange."""
print(f"[{self.exchange_name}] Disconnecting...")
await asyncio.sleep(self.connection_delay / 2)
self.connected = False
self.subscribed = False
print(f"[{self.exchange_name}] Disconnected")
async def subscribe_to_data(self, symbols: list, data_types: list) -> bool:
"""Simulate subscription to data streams."""
if not self.connected:
return False
print(f"[{self.exchange_name}] Subscribing to {len(symbols)} symbols: {', '.join(symbols)}")
await asyncio.sleep(0.05)
self.subscribed = True
return True
async def unsubscribe_from_data(self, symbols: list, data_types: list) -> bool:
"""Simulate unsubscription from data streams."""
print(f"[{self.exchange_name}] Unsubscribing from data streams")
self.subscribed = False
return True
async def _process_message(self, message: Any) -> Optional[MarketDataPoint]:
"""Process simulated market data message."""
self.message_count += 1
# Simulate periodic failures if configured
if (self.fail_every_n_messages > 0 and
self.message_count % self.fail_every_n_messages == 0):
raise Exception(f"Simulated failure after {self.message_count} messages")
# Create mock market data
data_point = MarketDataPoint(
exchange=self.exchange_name,
symbol=message['symbol'],
timestamp=datetime.now(timezone.utc),
data_type=DataType.TICKER,
data={
'price': message['price'],
'volume': message.get('volume', 100),
'timestamp': datetime.now(timezone.utc).isoformat()
}
)
return data_point
async def _handle_messages(self) -> None:
"""Simulate receiving and processing messages."""
if not self.connected or not self.subscribed:
await asyncio.sleep(0.1)
return
# Simulate receiving data for each symbol
for symbol in self.symbols:
try:
# Create simulated message
simulated_message = {
'symbol': symbol,
'price': 50000 + (self.message_count % 1000), # Fake price that changes
'volume': 1.5
}
# Process the message
data_point = await self._process_message(simulated_message)
if data_point:
self._stats['messages_processed'] += 1
await self._notify_callbacks(data_point)
except Exception as e:
# This will trigger reconnection logic
raise e
# Simulate processing delay
await asyncio.sleep(1.0)
async def data_callback(data_point: MarketDataPoint):
"""Callback function to handle received data."""
print(f"📊 Data received: {data_point.exchange} - {data_point.symbol} - "
f"Price: {data_point.data.get('price')} at {data_point.timestamp.strftime('%H:%M:%S')}")
async def monitor_collectors(manager: CollectorManager, duration: int = 30):
"""Monitor collector status and print updates."""
print(f"\n🔍 Starting monitoring for {duration} seconds...")
for i in range(duration):
await asyncio.sleep(1)
status = manager.get_status()
running = len(manager.get_running_collectors())
failed = len(manager.get_failed_collectors())
if i % 5 == 0: # Print status every 5 seconds
print(f"⏰ Status at {i+1}s: {running} running, {failed} failed, "
f"{status['statistics']['restarts_performed']} restarts")
print("🏁 Monitoring complete")
async def demo_basic_usage():
"""Demonstrate basic collector usage."""
print("=" * 60)
print("🚀 Demo 1: Basic Data Collector Usage")
print("=" * 60)
# Create a stable collector
collector = DemoDataCollector("demo_exchange", ["BTC-USDT", "ETH-USDT"])
# Add data callback
collector.add_data_callback(DataType.TICKER, data_callback)
# Start the collector
print("Starting collector...")
success = await collector.start()
if success:
print("✅ Collector started successfully")
# Let it run for a few seconds
await asyncio.sleep(5)
# Show status
status = collector.get_status()
print(f"📈 Messages processed: {status['statistics']['messages_processed']}")
print(f"⏱️ Uptime: {status['statistics']['uptime_seconds']:.1f}s")
# Stop the collector
await collector.stop()
print("✅ Collector stopped")
else:
print("❌ Failed to start collector")
async def demo_manager_usage():
"""Demonstrate collector manager usage."""
print("\n" + "=" * 60)
print("🎛️ Demo 2: Collector Manager Usage")
print("=" * 60)
# Create manager
manager = CollectorManager("demo_manager", global_health_check_interval=3.0)
# Create multiple collectors
stable_collector = DemoDataCollector("stable_exchange", ["BTC-USDT"])
failing_collector = DemoDataCollector("failing_exchange", ["ETH-USDT"],
fail_every_n_messages=5) # Fails every 5 messages
# Add data callbacks
stable_collector.add_data_callback(DataType.TICKER, data_callback)
failing_collector.add_data_callback(DataType.TICKER, data_callback)
# Add collectors to manager
manager.add_collector(stable_collector)
manager.add_collector(failing_collector)
print(f"📝 Added {len(manager.list_collectors())} collectors to manager")
# Start manager
success = await manager.start()
if success:
print("✅ Manager started successfully")
# Monitor for a while
await monitor_collectors(manager, duration=15)
# Show final status
status = manager.get_status()
print(f"\n📊 Final Statistics:")
print(f" - Total restarts: {status['statistics']['restarts_performed']}")
print(f" - Running collectors: {len(manager.get_running_collectors())}")
print(f" - Failed collectors: {len(manager.get_failed_collectors())}")
# Stop manager
await manager.stop()
print("✅ Manager stopped")
else:
print("❌ Failed to start manager")
async def demo_dynamic_management():
"""Demonstrate dynamic collector management."""
print("\n" + "=" * 60)
print("🔄 Demo 3: Dynamic Collector Management")
print("=" * 60)
# Create manager
manager = CollectorManager("dynamic_manager", global_health_check_interval=2.0)
# Start with one collector
collector1 = DemoDataCollector("exchange_1", ["BTC-USDT"])
collector1.add_data_callback(DataType.TICKER, data_callback)
manager.add_collector(collector1)
await manager.start()
print("✅ Started with 1 collector")
await asyncio.sleep(3)
# Add second collector
collector2 = DemoDataCollector("exchange_2", ["ETH-USDT"])
collector2.add_data_callback(DataType.TICKER, data_callback)
manager.add_collector(collector2)
print(" Added second collector")
await asyncio.sleep(3)
# Disable first collector
collector_names = manager.list_collectors()
manager.disable_collector(collector_names[0])
print("⏸️ Disabled first collector")
await asyncio.sleep(3)
# Re-enable first collector
manager.enable_collector(collector_names[0])
print("▶️ Re-enabled first collector")
await asyncio.sleep(3)
# Show final status
status = manager.get_status()
print(f"📊 Final state: {len(manager.get_running_collectors())} running collectors")
await manager.stop()
print("✅ Dynamic demo complete")
async def main():
"""Run all demonstrations."""
print("🎯 Data Collector System Demonstration")
print("This demo shows health monitoring and auto-restart capabilities\n")
try:
# Run demonstrations
await demo_basic_usage()
await demo_manager_usage()
await demo_dynamic_management()
print("\n" + "=" * 60)
print("🎉 All demonstrations completed successfully!")
print("=" * 60)
except Exception as e:
print(f"❌ Demo failed with error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -0,0 +1,412 @@
"""
Demonstration of running multiple data collectors in parallel.
This example shows how to set up and manage multiple collectors simultaneously,
each collecting data from different exchanges or different symbols.
"""
import asyncio
from datetime import datetime, timezone
from typing import Dict, Any
from data import (
BaseDataCollector, DataType, CollectorStatus, MarketDataPoint,
CollectorManager, CollectorConfig
)
class DemoExchangeCollector(BaseDataCollector):
"""Demo collector simulating different exchanges."""
def __init__(self,
exchange_name: str,
symbols: list,
message_interval: float = 1.0,
base_price: float = 50000):
"""
Initialize demo collector.
Args:
exchange_name: Name of the exchange (okx, binance, coinbase, etc.)
symbols: Trading symbols to collect
message_interval: Seconds between simulated messages
base_price: Base price for simulation
"""
super().__init__(exchange_name, symbols, [DataType.TICKER])
self.message_interval = message_interval
self.base_price = base_price
self.connected = False
self.subscribed = False
self.message_count = 0
async def connect(self) -> bool:
"""Simulate connection to exchange."""
print(f"🔌 [{self.exchange_name.upper()}] Connecting...")
await asyncio.sleep(0.2) # Simulate connection delay
self.connected = True
print(f"✅ [{self.exchange_name.upper()}] Connected successfully")
return True
async def disconnect(self) -> None:
"""Simulate disconnection from exchange."""
print(f"🔌 [{self.exchange_name.upper()}] Disconnecting...")
await asyncio.sleep(0.1)
self.connected = False
self.subscribed = False
print(f"❌ [{self.exchange_name.upper()}] Disconnected")
async def subscribe_to_data(self, symbols: list, data_types: list) -> bool:
"""Simulate subscription to data streams."""
if not self.connected:
return False
print(f"📡 [{self.exchange_name.upper()}] Subscribing to {len(symbols)} symbols")
await asyncio.sleep(0.1)
self.subscribed = True
return True
async def unsubscribe_from_data(self, symbols: list, data_types: list) -> bool:
"""Simulate unsubscription from data streams."""
print(f"📡 [{self.exchange_name.upper()}] Unsubscribing from data streams")
self.subscribed = False
return True
async def _process_message(self, message: Any) -> MarketDataPoint:
"""Process simulated market data message."""
self.message_count += 1
# Create realistic price variation
price_variation = (self.message_count % 100 - 50) * 10
current_price = self.base_price + price_variation
data_point = MarketDataPoint(
exchange=self.exchange_name,
symbol=message['symbol'],
timestamp=datetime.now(timezone.utc),
data_type=DataType.TICKER,
data={
'price': current_price,
'volume': message.get('volume', 1.0 + (self.message_count % 10) * 0.1),
'bid': current_price - 0.5,
'ask': current_price + 0.5,
'timestamp': datetime.now(timezone.utc).isoformat()
}
)
return data_point
async def _handle_messages(self) -> None:
"""Simulate receiving and processing messages."""
if not self.connected or not self.subscribed:
await asyncio.sleep(0.1)
return
# Process each symbol
for symbol in self.symbols:
try:
# Create simulated message
simulated_message = {
'symbol': symbol,
'volume': 1.5 + (self.message_count % 5) * 0.2
}
# Process the message
data_point = await self._process_message(simulated_message)
if data_point:
self._stats['messages_processed'] += 1
await self._notify_callbacks(data_point)
except Exception as e:
self.logger.error(f"Error processing message for {symbol}: {e}")
raise e
# Wait before next batch of messages
await asyncio.sleep(self.message_interval)
def create_data_callback(exchange_name: str):
"""Create a data callback function for a specific exchange."""
def data_callback(data_point: MarketDataPoint):
print(f"📊 {exchange_name.upper():8} | {data_point.symbol:10} | "
f"${data_point.data.get('price', 0):8.2f} | "
f"Vol: {data_point.data.get('volume', 0):.2f} | "
f"{data_point.timestamp.strftime('%H:%M:%S')}")
return data_callback
async def demo_parallel_collectors():
"""Demonstrate running multiple collectors in parallel."""
print("=" * 80)
print("🚀 PARALLEL COLLECTORS DEMONSTRATION")
print("=" * 80)
print("Running multiple exchange collectors simultaneously...")
print()
# Create manager
manager = CollectorManager(
"parallel_demo_manager",
global_health_check_interval=10.0 # Check every 10 seconds
)
# Define exchange configurations
exchange_configs = [
{
'name': 'okx',
'symbols': ['BTC-USDT', 'ETH-USDT'],
'interval': 1.0,
'base_price': 45000
},
{
'name': 'binance',
'symbols': ['BTC-USDT', 'ETH-USDT', 'SOL-USDT'],
'interval': 1.5,
'base_price': 45100
},
{
'name': 'coinbase',
'symbols': ['BTC-USD', 'ETH-USD'],
'interval': 2.0,
'base_price': 44900
},
{
'name': 'kraken',
'symbols': ['XBTUSD', 'ETHUSD'],
'interval': 1.2,
'base_price': 45050
}
]
# Create and configure collectors
for config in exchange_configs:
# Create collector
collector = DemoExchangeCollector(
exchange_name=config['name'],
symbols=config['symbols'],
message_interval=config['interval'],
base_price=config['base_price']
)
# Add data callback
callback = create_data_callback(config['name'])
collector.add_data_callback(DataType.TICKER, callback)
# Add to manager with configuration
collector_config = CollectorConfig(
name=f"{config['name']}_collector",
exchange=config['name'],
symbols=config['symbols'],
data_types=['ticker'],
auto_restart=True,
health_check_interval=15.0,
enabled=True
)
manager.add_collector(collector, collector_config)
print(f" Added {config['name'].upper()} collector with {len(config['symbols'])} symbols")
print(f"\n📝 Total collectors added: {len(manager.list_collectors())}")
print()
# Start all collectors in parallel
print("🏁 Starting all collectors...")
start_time = asyncio.get_event_loop().time()
success = await manager.start()
if not success:
print("❌ Failed to start collector manager")
return
startup_time = asyncio.get_event_loop().time() - start_time
print(f"✅ All collectors started in {startup_time:.2f} seconds")
print()
print("📊 DATA STREAM (All exchanges running in parallel):")
print("-" * 80)
# Monitor for a period
monitoring_duration = 30 # seconds
for i in range(monitoring_duration):
await asyncio.sleep(1)
# Print status every 10 seconds
if i % 10 == 0 and i > 0:
status = manager.get_status()
print()
print(f"⏰ STATUS UPDATE ({i}s):")
print(f" Running collectors: {len(manager.get_running_collectors())}")
print(f" Failed collectors: {len(manager.get_failed_collectors())}")
print(f" Total restarts: {status['statistics']['restarts_performed']}")
print("-" * 80)
# Final status report
print()
print("📈 FINAL STATUS REPORT:")
print("=" * 80)
status = manager.get_status()
print(f"Manager Status: {status['manager_status']}")
print(f"Total Collectors: {status['total_collectors']}")
print(f"Running Collectors: {len(manager.get_running_collectors())}")
print(f"Failed Collectors: {len(manager.get_failed_collectors())}")
print(f"Total Restarts: {status['statistics']['restarts_performed']}")
# Individual collector statistics
print("\n📊 INDIVIDUAL COLLECTOR STATS:")
for collector_name in manager.list_collectors():
collector_status = manager.get_collector_status(collector_name)
if collector_status:
stats = collector_status['status']['statistics']
health = collector_status['health']
print(f"\n{collector_name.upper()}:")
print(f" Status: {collector_status['status']['status']}")
print(f" Messages Processed: {stats['messages_processed']}")
print(f" Uptime: {stats.get('uptime_seconds', 0):.1f}s")
print(f" Errors: {stats['errors']}")
print(f" Healthy: {health['is_healthy']}")
# Stop all collectors
print("\n🛑 Stopping all collectors...")
await manager.stop()
print("✅ All collectors stopped successfully")
async def demo_dynamic_management():
"""Demonstrate dynamic addition/removal of collectors."""
print("\n" + "=" * 80)
print("🔄 DYNAMIC COLLECTOR MANAGEMENT")
print("=" * 80)
manager = CollectorManager("dynamic_manager")
# Start with one collector
collector1 = DemoExchangeCollector("exchange_a", ["BTC-USDT"], 1.0)
collector1.add_data_callback(DataType.TICKER, create_data_callback("exchange_a"))
manager.add_collector(collector1)
await manager.start()
print("✅ Started with 1 collector")
await asyncio.sleep(3)
# Add second collector while system is running
collector2 = DemoExchangeCollector("exchange_b", ["ETH-USDT"], 1.5)
collector2.add_data_callback(DataType.TICKER, create_data_callback("exchange_b"))
manager.add_collector(collector2)
print(" Added second collector while running")
await asyncio.sleep(3)
# Add third collector
collector3 = DemoExchangeCollector("exchange_c", ["SOL-USDT"], 2.0)
collector3.add_data_callback(DataType.TICKER, create_data_callback("exchange_c"))
manager.add_collector(collector3)
print(" Added third collector")
await asyncio.sleep(5)
# Show current status
print(f"\n📊 Current Status: {len(manager.get_running_collectors())} collectors running")
# Disable one collector
collectors = manager.list_collectors()
if len(collectors) > 1:
manager.disable_collector(collectors[1])
print(f"⏸️ Disabled collector: {collectors[1]}")
await asyncio.sleep(3)
# Re-enable
if len(collectors) > 1:
manager.enable_collector(collectors[1])
print(f"▶️ Re-enabled collector: {collectors[1]}")
await asyncio.sleep(3)
print(f"\n📊 Final Status: {len(manager.get_running_collectors())} collectors running")
await manager.stop()
print("✅ Dynamic management demo complete")
async def demo_performance_monitoring():
"""Demonstrate performance monitoring across multiple collectors."""
print("\n" + "=" * 80)
print("📈 PERFORMANCE MONITORING")
print("=" * 80)
manager = CollectorManager("performance_monitor", global_health_check_interval=5.0)
# Create collectors with different performance characteristics
configs = [
("fast_exchange", ["BTC-USDT"], 0.5), # Fast updates
("medium_exchange", ["ETH-USDT"], 1.0), # Medium updates
("slow_exchange", ["SOL-USDT"], 2.0), # Slow updates
]
for exchange, symbols, interval in configs:
collector = DemoExchangeCollector(exchange, symbols, interval)
collector.add_data_callback(DataType.TICKER, create_data_callback(exchange))
manager.add_collector(collector)
await manager.start()
print("✅ Started performance monitoring demo")
# Monitor performance for 20 seconds
for i in range(4):
await asyncio.sleep(5)
print(f"\n📊 PERFORMANCE SNAPSHOT ({(i+1)*5}s):")
print("-" * 60)
for collector_name in manager.list_collectors():
status = manager.get_collector_status(collector_name)
if status:
stats = status['status']['statistics']
health = status['health']
msg_rate = stats['messages_processed'] / max(stats.get('uptime_seconds', 1), 1)
print(f"{collector_name:15} | "
f"Rate: {msg_rate:5.1f}/s | "
f"Total: {stats['messages_processed']:4d} | "
f"Errors: {stats['errors']:2d} | "
f"Health: {'' if health['is_healthy'] else ''}")
await manager.stop()
print("\n✅ Performance monitoring demo complete")
async def main():
"""Run all parallel collector demonstrations."""
print("🎯 MULTIPLE COLLECTORS PARALLEL EXECUTION DEMO")
print("This demonstration shows the CollectorManager running multiple collectors simultaneously\n")
try:
# Main parallel demo
await demo_parallel_collectors()
# Dynamic management demo
await demo_dynamic_management()
# Performance monitoring demo
await demo_performance_monitoring()
print("\n" + "=" * 80)
print("🎉 ALL PARALLEL EXECUTION DEMOS COMPLETED!")
print("=" * 80)
print("\nKey takeaways:")
print("✅ Multiple collectors run truly in parallel")
print("✅ Each collector operates independently")
print("✅ Collectors can be added/removed while system is running")
print("✅ Centralized health monitoring across all collectors")
print("✅ Individual performance tracking per collector")
print("✅ Coordinated lifecycle management")
except Exception as e:
print(f"❌ Demo failed with error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -69,3 +69,8 @@ python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
[dependency-groups]
dev = [
"pytest-asyncio>=1.0.0",
]

View File

@ -10,6 +10,9 @@
- `database/migrations/` - Alembic migration system for database schema versioning and updates
- `database/init/init.sql` - Docker initialization script for automatic database setup
- `database/init/schema_clean.sql` - Copy of clean schema for Docker initialization
- `data/base_collector.py` - Abstract base class for all data collectors with standardized interface, error handling, data validation, health monitoring, and auto-restart capabilities
- `data/collector_manager.py` - Centralized collector management with health monitoring, auto-recovery, and coordinated lifecycle management
- `data/__init__.py` - Data collection package initialization
- `data/okx_collector.py` - OKX API integration for real-time market data collection
- `data/aggregator.py` - OHLCV candle aggregation and processing
- `strategies/base_strategy.py` - Base strategy class and interface
@ -31,6 +34,8 @@
- `tests/test_strategies.py` - Unit tests for strategy implementations
- `tests/test_bot_manager.py` - Unit tests for bot management functionality
- `tests/test_data_collection.py` - Unit tests for data collection and aggregation
- `tests/test_base_collector.py` - Comprehensive unit tests for the BaseDataCollector abstract class (13 tests)
- `tests/test_collector_manager.py` - Comprehensive unit tests for the CollectorManager with health monitoring (14 tests)
- `tests/test_logging_enhanced.py` - Comprehensive unit tests for enhanced logging features (16 tests)
- `docs/setup.md` - Comprehensive setup guide for new machines and environments
- `docs/logging.md` - Complete documentation for the enhanced unified logging system
@ -49,6 +54,9 @@
- [x] 1.9 Add unified logging system we can use for all components
- [ ] 2.0 Market Data Collection and Processing System
- [x] 2.0.1 Create abstract base class for data collectors with standardized interface, error handling, and data validation
- [x] 2.0.2 Enhance data collectors with health monitoring, heartbeat system, and auto-restart capabilities
- [x] 2.0.3 Create collector manager for supervising multiple data collectors with coordinated lifecycle management
- [ ] 2.1 Implement OKX WebSocket API connector for real-time data
- [ ] 2.2 Create OHLCV candle aggregation logic with multiple timeframes (1m, 5m, 15m, 1h, 4h, 1d)
- [ ] 2.3 Build data validation and error handling for market data

View File

@ -0,0 +1,333 @@
"""
Unit tests for the BaseDataCollector abstract class.
"""
import asyncio
import pytest
from datetime import datetime, timezone
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock
from data.base_collector import (
BaseDataCollector, DataType, CollectorStatus, MarketDataPoint,
OHLCVData, DataValidationError, DataCollectorError
)
class TestDataCollector(BaseDataCollector):
"""Test implementation of BaseDataCollector for testing."""
def __init__(self, exchange_name: str, symbols: list, data_types=None):
super().__init__(exchange_name, symbols, data_types)
self.connected = False
self.subscribed = False
self.messages = []
async def connect(self) -> bool:
await asyncio.sleep(0.01) # Simulate connection delay
self.connected = True
return True
async def disconnect(self) -> None:
await asyncio.sleep(0.01) # Simulate disconnection delay
self.connected = False
self.subscribed = False
async def subscribe_to_data(self, symbols: list, data_types: list) -> bool:
if not self.connected:
return False
self.subscribed = True
return True
async def unsubscribe_from_data(self, symbols: list, data_types: list) -> bool:
self.subscribed = False
return True
async def _process_message(self, message) -> MarketDataPoint:
self._stats['messages_received'] += 1
return MarketDataPoint(
exchange=self.exchange_name,
symbol=message.get('symbol', 'BTC-USDT'),
timestamp=datetime.now(timezone.utc),
data_type=DataType.TICKER,
data=message
)
async def _handle_messages(self) -> None:
# Simulate receiving messages
if self.messages:
message = self.messages.pop(0)
data_point = await self._process_message(message)
self._stats['messages_processed'] += 1
self._stats['last_message_time'] = datetime.now(timezone.utc)
await self._notify_callbacks(data_point)
else:
await asyncio.sleep(0.1) # Wait for messages
def add_test_message(self, message: dict):
"""Add a test message to be processed."""
self.messages.append(message)
class TestBaseDataCollector:
"""Test cases for BaseDataCollector."""
@pytest.fixture
def collector(self):
"""Create a test collector instance."""
return TestDataCollector("okx", ["BTC-USDT", "ETH-USDT"], [DataType.TICKER])
def test_initialization(self, collector):
"""Test collector initialization."""
assert collector.exchange_name == "okx"
assert collector.symbols == {"BTC-USDT", "ETH-USDT"}
assert collector.data_types == [DataType.TICKER]
assert collector.status == CollectorStatus.STOPPED
assert not collector._running
@pytest.mark.asyncio
async def test_start_stop_cycle(self, collector):
"""Test starting and stopping the collector."""
# Test start
success = await collector.start()
assert success
assert collector.status == CollectorStatus.RUNNING
assert collector.connected
assert collector.subscribed
assert collector._running
# Wait a bit for the message loop to start
await asyncio.sleep(0.1)
# Test stop
await collector.stop()
assert collector.status == CollectorStatus.STOPPED
assert not collector._running
assert not collector.connected
assert not collector.subscribed
@pytest.mark.asyncio
async def test_message_processing(self, collector):
"""Test message processing and callbacks."""
received_data = []
def callback(data_point: MarketDataPoint):
received_data.append(data_point)
collector.add_data_callback(DataType.TICKER, callback)
await collector.start()
# Add test message
test_message = {"symbol": "BTC-USDT", "price": "50000"}
collector.add_test_message(test_message)
# Wait for message processing
await asyncio.sleep(0.2)
await collector.stop()
# Verify message was processed
assert len(received_data) == 1
assert received_data[0].symbol == "BTC-USDT"
assert received_data[0].data_type == DataType.TICKER
assert collector._stats['messages_received'] == 1
assert collector._stats['messages_processed'] == 1
def test_symbol_management(self, collector):
"""Test adding and removing symbols."""
initial_count = len(collector.symbols)
# Add new symbol
collector.add_symbol("LTC-USDT")
assert "LTC-USDT" in collector.symbols
assert len(collector.symbols) == initial_count + 1
# Remove symbol
collector.remove_symbol("BTC-USDT")
assert "BTC-USDT" not in collector.symbols
assert len(collector.symbols) == initial_count
# Try to add existing symbol (should not duplicate)
collector.add_symbol("ETH-USDT")
assert len(collector.symbols) == initial_count
def test_callback_management(self, collector):
"""Test adding and removing callbacks."""
def callback1(data): pass
def callback2(data): pass
# Add callbacks
collector.add_data_callback(DataType.TICKER, callback1)
collector.add_data_callback(DataType.TICKER, callback2)
assert len(collector._data_callbacks[DataType.TICKER]) == 2
# Remove callback
collector.remove_data_callback(DataType.TICKER, callback1)
assert len(collector._data_callbacks[DataType.TICKER]) == 1
assert callback2 in collector._data_callbacks[DataType.TICKER]
def test_get_status(self, collector):
"""Test status reporting."""
status = collector.get_status()
assert status['exchange'] == 'okx'
assert status['status'] == 'stopped'
assert set(status['symbols']) == {"BTC-USDT", "ETH-USDT"}
assert status['data_types'] == ['ticker']
assert 'statistics' in status
assert status['statistics']['messages_received'] == 0
class TestOHLCVData:
"""Test cases for OHLCVData validation."""
def test_valid_ohlcv_data(self):
"""Test creating valid OHLCV data."""
ohlcv = OHLCVData(
symbol="BTC-USDT",
timeframe="1m",
timestamp=datetime.now(timezone.utc),
open=Decimal("50000"),
high=Decimal("50100"),
low=Decimal("49900"),
close=Decimal("50050"),
volume=Decimal("1.5"),
trades_count=100
)
assert ohlcv.symbol == "BTC-USDT"
assert ohlcv.timeframe == "1m"
assert isinstance(ohlcv.open, Decimal)
assert ohlcv.trades_count == 100
def test_invalid_ohlcv_relationships(self):
"""Test OHLCV validation for invalid price relationships."""
with pytest.raises(DataValidationError):
OHLCVData(
symbol="BTC-USDT",
timeframe="1m",
timestamp=datetime.now(timezone.utc),
open=Decimal("50000"),
high=Decimal("49000"), # High is less than open
low=Decimal("49900"),
close=Decimal("50050"),
volume=Decimal("1.5")
)
def test_ohlcv_decimal_conversion(self):
"""Test automatic conversion to Decimal."""
ohlcv = OHLCVData(
symbol="BTC-USDT",
timeframe="1m",
timestamp=datetime.now(timezone.utc),
open=50000.0, # float
high=50100, # int
low=49900, # int (changed from string to test proper conversion)
close=50050.0, # float
volume=1.5 # float
)
assert isinstance(ohlcv.open, Decimal)
assert isinstance(ohlcv.high, Decimal)
assert isinstance(ohlcv.low, Decimal)
assert isinstance(ohlcv.close, Decimal)
assert isinstance(ohlcv.volume, Decimal)
class TestDataValidation:
"""Test cases for data validation methods."""
def test_validate_ohlcv_data_success(self):
"""Test successful OHLCV data validation."""
collector = TestDataCollector("test", ["BTC-USDT"])
raw_data = {
"timestamp": 1609459200000, # Unix timestamp in ms
"open": "50000",
"high": "50100",
"low": "49900",
"close": "50050",
"volume": "1.5",
"trades_count": 100
}
ohlcv = collector.validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
assert ohlcv.symbol == "BTC-USDT"
assert ohlcv.timeframe == "1m"
assert ohlcv.trades_count == 100
assert isinstance(ohlcv.open, Decimal)
def test_validate_ohlcv_data_missing_field(self):
"""Test OHLCV validation with missing required field."""
collector = TestDataCollector("test", ["BTC-USDT"])
raw_data = {
"timestamp": 1609459200000,
"open": "50000",
"high": "50100",
# Missing 'low' field
"close": "50050",
"volume": "1.5"
}
with pytest.raises(DataValidationError, match="Missing required field: low"):
collector.validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
def test_validate_ohlcv_data_invalid_timestamp(self):
"""Test OHLCV validation with invalid timestamp."""
collector = TestDataCollector("test", ["BTC-USDT"])
raw_data = {
"timestamp": "invalid_timestamp",
"open": "50000",
"high": "50100",
"low": "49900",
"close": "50050",
"volume": "1.5"
}
with pytest.raises(DataValidationError):
collector.validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
@pytest.mark.asyncio
async def test_connection_error_handling():
"""Test connection error handling and reconnection."""
class FailingCollector(TestDataCollector):
def __init__(self):
super().__init__("test", ["BTC-USDT"])
self.connect_attempts = 0
self.should_fail = True
async def connect(self) -> bool:
self.connect_attempts += 1
if self.should_fail and self.connect_attempts < 3:
return False # Fail first 2 attempts
return await super().connect()
collector = FailingCollector()
# First start should fail
success = await collector.start()
assert not success
assert collector.status == CollectorStatus.ERROR
# Reset for retry and allow success
collector._reconnect_attempts = 0
collector.status = CollectorStatus.STOPPED
collector.connect_attempts = 0 # Reset connection attempts
collector.should_fail = False # Allow connection to succeed
# This attempt should succeed
success = await collector.start()
assert success
assert collector.status == CollectorStatus.RUNNING
await collector.stop()
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@ -0,0 +1,341 @@
"""
Unit tests for the CollectorManager class.
"""
import asyncio
import pytest
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock
from data.collector_manager import CollectorManager, ManagerStatus, CollectorConfig
from data.base_collector import BaseDataCollector, DataType, CollectorStatus
class MockDataCollector(BaseDataCollector):
"""Mock implementation of BaseDataCollector for testing."""
def __init__(self, exchange_name: str, symbols: list, auto_restart: bool = True):
super().__init__(exchange_name, symbols, [DataType.TICKER], auto_restart=auto_restart)
self.connected = False
self.subscribed = False
self.should_fail_connect = False
self.should_fail_subscribe = False
self.fail_count = 0
async def connect(self) -> bool:
if self.should_fail_connect and self.fail_count < 2:
self.fail_count += 1
return False
await asyncio.sleep(0.01)
self.connected = True
return True
async def disconnect(self) -> None:
await asyncio.sleep(0.01)
self.connected = False
self.subscribed = False
async def subscribe_to_data(self, symbols: list, data_types: list) -> bool:
if self.should_fail_subscribe:
return False
if not self.connected:
return False
self.subscribed = True
return True
async def unsubscribe_from_data(self, symbols: list, data_types: list) -> bool:
self.subscribed = False
return True
async def _process_message(self, message) -> None:
# No message processing in mock
pass
async def _handle_messages(self) -> None:
# Simulate light processing
await asyncio.sleep(0.1)
class TestCollectorManager:
"""Test cases for CollectorManager."""
@pytest.fixture
def manager(self):
"""Create a test manager instance."""
return CollectorManager("test_manager", global_health_check_interval=1.0)
@pytest.fixture
def mock_collector(self):
"""Create a mock collector."""
return MockDataCollector("okx", ["BTC-USDT", "ETH-USDT"])
def test_initialization(self, manager):
"""Test manager initialization."""
assert manager.manager_name == "test_manager"
assert manager.status == ManagerStatus.STOPPED
assert len(manager._collectors) == 0
assert len(manager._enabled_collectors) == 0
def test_add_collector(self, manager, mock_collector):
"""Test adding a collector to the manager."""
# Add collector
manager.add_collector(mock_collector)
assert len(manager._collectors) == 1
assert len(manager._enabled_collectors) == 1
# Verify collector is in the collections
collector_names = manager.list_collectors()
assert len(collector_names) == 1
assert collector_names[0].startswith("okx_")
# Test with custom config using a different collector instance
mock_collector2 = MockDataCollector("binance", ["ETH-USDT"])
config = CollectorConfig(
name="custom_collector",
exchange="binance",
symbols=["ETH-USDT"],
data_types=["ticker"],
enabled=False
)
manager.add_collector(mock_collector2, config)
assert len(manager._collectors) == 2
assert len(manager._enabled_collectors) == 1 # Still 1 since second is disabled
def test_remove_collector(self, manager, mock_collector):
"""Test removing a collector from the manager."""
# Add then remove
manager.add_collector(mock_collector)
collector_names = manager.list_collectors()
collector_name = collector_names[0]
success = manager.remove_collector(collector_name)
assert success
assert len(manager._collectors) == 0
assert len(manager._enabled_collectors) == 0
# Test removing non-existent collector
success = manager.remove_collector("non_existent")
assert not success
def test_enable_disable_collector(self, manager, mock_collector):
"""Test enabling and disabling collectors."""
manager.add_collector(mock_collector)
collector_name = manager.list_collectors()[0]
# Initially enabled
assert collector_name in manager._enabled_collectors
# Disable
success = manager.disable_collector(collector_name)
assert success
assert collector_name not in manager._enabled_collectors
# Enable again
success = manager.enable_collector(collector_name)
assert success
assert collector_name in manager._enabled_collectors
# Test with non-existent collector
success = manager.enable_collector("non_existent")
assert not success
@pytest.mark.asyncio
async def test_start_stop_manager(self, manager, mock_collector):
"""Test starting and stopping the manager."""
# Add a collector
manager.add_collector(mock_collector)
# Start manager
success = await manager.start()
assert success
assert manager.status == ManagerStatus.RUNNING
# Wait a bit for collectors to start
await asyncio.sleep(0.2)
# Check collector is running
running_collectors = manager.get_running_collectors()
assert len(running_collectors) == 1
# Stop manager
await manager.stop()
assert manager.status == ManagerStatus.STOPPED
# Check collector is stopped
running_collectors = manager.get_running_collectors()
assert len(running_collectors) == 0
@pytest.mark.asyncio
async def test_restart_collector(self, manager, mock_collector):
"""Test restarting a specific collector."""
manager.add_collector(mock_collector)
await manager.start()
collector_name = manager.list_collectors()[0]
# Wait for collector to start
await asyncio.sleep(0.2)
# Restart the collector
success = await manager.restart_collector(collector_name)
assert success
# Check statistics
status = manager.get_status()
assert status['statistics']['restarts_performed'] >= 1
await manager.stop()
@pytest.mark.asyncio
async def test_health_monitoring(self, manager):
"""Test health monitoring and auto-restart functionality."""
# Create a collector that will fail initially
failing_collector = MockDataCollector("test", ["BTC-USDT"], auto_restart=True)
failing_collector.should_fail_connect = True
manager.add_collector(failing_collector)
await manager.start()
# Wait for health checks
await asyncio.sleep(2.5) # More than health check interval
# Check that restarts were attempted
status = manager.get_status()
failed_collectors = manager.get_failed_collectors()
# The collector should have been marked as failed and restart attempts made
assert len(failed_collectors) >= 0 # May have recovered
await manager.stop()
def test_get_status(self, manager, mock_collector):
"""Test status reporting."""
manager.add_collector(mock_collector)
status = manager.get_status()
assert status['manager_status'] == 'stopped'
assert status['total_collectors'] == 1
assert len(status['enabled_collectors']) == 1
assert 'statistics' in status
assert 'collectors' in status
def test_get_collector_status(self, manager, mock_collector):
"""Test getting individual collector status."""
manager.add_collector(mock_collector)
collector_name = manager.list_collectors()[0]
collector_status = manager.get_collector_status(collector_name)
assert collector_status is not None
assert collector_status['name'] == collector_name
assert 'config' in collector_status
assert 'status' in collector_status
assert 'health' in collector_status
# Test non-existent collector
non_existent_status = manager.get_collector_status("non_existent")
assert non_existent_status is None
@pytest.mark.asyncio
async def test_restart_all_collectors(self, manager):
"""Test restarting all collectors."""
# Add multiple collectors
collector1 = MockDataCollector("okx", ["BTC-USDT"])
collector2 = MockDataCollector("binance", ["ETH-USDT"])
manager.add_collector(collector1)
manager.add_collector(collector2)
await manager.start()
await asyncio.sleep(0.2) # Let them start
# Restart all
results = await manager.restart_all_collectors()
assert len(results) == 2
assert all(success for success in results.values())
await manager.stop()
def test_get_running_and_failed_collectors(self, manager, mock_collector):
"""Test getting running and failed collector lists."""
manager.add_collector(mock_collector)
# Initially no running collectors
running = manager.get_running_collectors()
failed = manager.get_failed_collectors()
assert len(running) == 0
# Note: failed might be empty since collector hasn't started yet
def test_collector_config(self):
"""Test CollectorConfig dataclass."""
config = CollectorConfig(
name="test_collector",
exchange="okx",
symbols=["BTC-USDT", "ETH-USDT"],
data_types=["ticker", "trade"],
auto_restart=True,
health_check_interval=30.0,
enabled=True
)
assert config.name == "test_collector"
assert config.exchange == "okx"
assert len(config.symbols) == 2
assert len(config.data_types) == 2
assert config.auto_restart is True
assert config.enabled is True
@pytest.mark.asyncio
async def test_manager_with_connection_failures():
"""Test manager handling collectors with connection failures."""
manager = CollectorManager("test_manager", global_health_check_interval=0.5)
# Create a collector that fails connection initially
failing_collector = MockDataCollector("failing_exchange", ["BTC-USDT"])
failing_collector.should_fail_connect = True
manager.add_collector(failing_collector)
# Start manager
success = await manager.start()
assert success # Manager should start even if collectors fail
# Wait for some health checks
await asyncio.sleep(1.5)
# Check that the failing collector is detected
failed_collectors = manager.get_failed_collectors()
status = manager.get_status()
# The collector should be in failed state or have restart attempts
assert status['statistics']['restarts_performed'] >= 0
await manager.stop()
@pytest.mark.asyncio
async def test_manager_graceful_shutdown():
"""Test that manager shuts down gracefully even with problematic collectors."""
manager = CollectorManager("test_manager")
# Add multiple collectors
for i in range(3):
collector = MockDataCollector(f"exchange_{i}", ["BTC-USDT"])
manager.add_collector(collector)
await manager.start()
await asyncio.sleep(0.2)
# Stop should complete even if collectors take time
await manager.stop()
assert manager.status == ManagerStatus.STOPPED
if __name__ == "__main__":
pytest.main([__file__, "-v"])

8
uv.lock generated
View File

@ -428,6 +428,11 @@ dev = [
{ name = "pytest-mock" },
]
[package.dev-dependencies]
dev = [
{ name = "pytest-asyncio" },
]
[package.metadata]
requires-dist = [
{ name = "aiohttp", specifier = ">=3.8.0" },
@ -462,6 +467,9 @@ requires-dist = [
]
provides-extras = ["dev"]
[package.metadata.requires-dev]
dev = [{ name = "pytest-asyncio", specifier = ">=1.0.0" }]
[[package]]
name = "distlib"
version = "0.3.9"