Implement Service Configuration Manager for data collection service
- Introduced `service_config.py` to manage configuration loading, validation, and schema management, enhancing modularity and security. - Created a `ServiceConfig` class for handling configuration with robust error handling and default values. - Refactored `DataCollectionService` to utilize the new `ServiceConfig`, streamlining configuration management and improving readability. - Added a `CollectorFactory` to encapsulate collector creation logic, promoting separation of concerns. - Updated `CollectorManager` and related components to align with the new architecture, ensuring better maintainability. - Enhanced logging practices across the service for improved monitoring and debugging. These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity and performance.
This commit is contained in:
18
data/manager_components/__init__.py
Normal file
18
data/manager_components/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
Manager components package for collector management.
|
||||
|
||||
This package contains specialized components that handle different aspects
|
||||
of collector management following the Single Responsibility Principle.
|
||||
"""
|
||||
|
||||
from .collector_lifecycle_manager import CollectorLifecycleManager
|
||||
from .manager_health_monitor import ManagerHealthMonitor
|
||||
from .manager_stats_tracker import ManagerStatsTracker
|
||||
from .manager_logger import ManagerLogger
|
||||
|
||||
__all__ = [
|
||||
'CollectorLifecycleManager',
|
||||
'ManagerHealthMonitor',
|
||||
'ManagerStatsTracker',
|
||||
'ManagerLogger'
|
||||
]
|
||||
342
data/manager_components/collector_lifecycle_manager.py
Normal file
342
data/manager_components/collector_lifecycle_manager.py
Normal file
@@ -0,0 +1,342 @@
|
||||
"""
|
||||
Collector Lifecycle Manager for handling collector lifecycle operations.
|
||||
|
||||
This module handles the lifecycle of data collectors including adding, removing,
|
||||
enabling, disabling, starting, and restarting collectors.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Dict, Set, Optional
|
||||
from ..base_collector import BaseDataCollector, CollectorStatus
|
||||
from ..collector_types import CollectorConfig
|
||||
|
||||
|
||||
class CollectorLifecycleManager:
|
||||
"""Manages the lifecycle of data collectors."""
|
||||
|
||||
def __init__(self, logger_manager=None):
|
||||
"""
|
||||
Initialize the lifecycle manager.
|
||||
|
||||
Args:
|
||||
logger_manager: Logger manager instance for logging operations
|
||||
"""
|
||||
self.logger_manager = logger_manager
|
||||
|
||||
# Collector storage
|
||||
self._collectors: Dict[str, BaseDataCollector] = {}
|
||||
self._collector_configs: Dict[str, CollectorConfig] = {}
|
||||
self._enabled_collectors: Set[str] = set()
|
||||
|
||||
# Manager state
|
||||
self._running = False
|
||||
self._stats = {'total_collectors': 0, 'restarts_performed': 0}
|
||||
|
||||
def set_running_state(self, running: bool) -> None:
|
||||
"""Set the running state of the manager."""
|
||||
self._running = running
|
||||
|
||||
def get_stats(self) -> Dict:
|
||||
"""Get lifecycle statistics."""
|
||||
return self._stats.copy()
|
||||
|
||||
def add_collector(self,
|
||||
collector: BaseDataCollector,
|
||||
config: Optional[CollectorConfig] = None) -> None:
|
||||
"""
|
||||
Add a collector to be managed.
|
||||
|
||||
Args:
|
||||
collector: Data collector instance
|
||||
config: Optional configuration (will create default if not provided)
|
||||
"""
|
||||
# Use a more unique name to avoid duplicates
|
||||
collector_name = f"{collector.exchange_name}_{int(time.time() * 1000000) % 1000000}"
|
||||
|
||||
# Ensure unique name
|
||||
counter = 1
|
||||
base_name = collector_name
|
||||
while collector_name in self._collectors:
|
||||
collector_name = f"{base_name}_{counter}"
|
||||
counter += 1
|
||||
|
||||
if config is None:
|
||||
config = CollectorConfig(
|
||||
name=collector_name,
|
||||
exchange=collector.exchange_name,
|
||||
symbols=list(collector.symbols),
|
||||
data_types=[dt.value for dt in collector.data_types],
|
||||
auto_restart=collector.auto_restart,
|
||||
health_check_interval=collector._state_telemetry.health_check_interval
|
||||
)
|
||||
|
||||
self._collectors[collector_name] = collector
|
||||
self._collector_configs[collector_name] = config
|
||||
|
||||
if config.enabled:
|
||||
self._enabled_collectors.add(collector_name)
|
||||
|
||||
self._stats['total_collectors'] = len(self._collectors)
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(
|
||||
f"Added collector: {collector_name} ({collector.exchange_name}) - "
|
||||
f"Symbols: {', '.join(collector.symbols)} - Enabled: {config.enabled}"
|
||||
)
|
||||
|
||||
def remove_collector(self, collector_name: str) -> bool:
|
||||
"""
|
||||
Remove a collector from management.
|
||||
|
||||
Args:
|
||||
collector_name: Name of the collector to remove
|
||||
|
||||
Returns:
|
||||
True if removed successfully, False if not found
|
||||
"""
|
||||
if collector_name not in self._collectors:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning(f"Collector not found: {collector_name}")
|
||||
return False
|
||||
|
||||
# Stop the collector first (only if event loop is running)
|
||||
collector = self._collectors[collector_name]
|
||||
if collector.status != CollectorStatus.STOPPED:
|
||||
try:
|
||||
asyncio.create_task(collector.stop(force=True))
|
||||
except RuntimeError:
|
||||
# No event loop running, just log
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(
|
||||
f"Collector {collector_name} will be removed without stopping (no event loop)"
|
||||
)
|
||||
|
||||
# Remove from management
|
||||
del self._collectors[collector_name]
|
||||
del self._collector_configs[collector_name]
|
||||
self._enabled_collectors.discard(collector_name)
|
||||
|
||||
self._stats['total_collectors'] = len(self._collectors)
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(f"Removed collector: {collector_name}")
|
||||
return True
|
||||
|
||||
def enable_collector(self, collector_name: str) -> bool:
|
||||
"""
|
||||
Enable a collector (will be started if manager is running).
|
||||
|
||||
Args:
|
||||
collector_name: Name of the collector to enable
|
||||
|
||||
Returns:
|
||||
True if enabled successfully, False if not found
|
||||
"""
|
||||
if collector_name not in self._collectors:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning(f"Collector not found: {collector_name}")
|
||||
return False
|
||||
|
||||
self._enabled_collectors.add(collector_name)
|
||||
self._collector_configs[collector_name].enabled = True
|
||||
|
||||
# Start the collector if manager is running (only if event loop is running)
|
||||
if self._running:
|
||||
try:
|
||||
asyncio.create_task(self._start_collector(collector_name))
|
||||
except RuntimeError:
|
||||
# No event loop running, will be started when manager starts
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug(
|
||||
f"Collector {collector_name} enabled but will start when manager starts"
|
||||
)
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(f"Enabled collector: {collector_name}")
|
||||
return True
|
||||
|
||||
def disable_collector(self, collector_name: str) -> bool:
|
||||
"""
|
||||
Disable a collector (will be stopped if running).
|
||||
|
||||
Args:
|
||||
collector_name: Name of the collector to disable
|
||||
|
||||
Returns:
|
||||
True if disabled successfully, False if not found
|
||||
"""
|
||||
if collector_name not in self._collectors:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning(f"Collector not found: {collector_name}")
|
||||
return False
|
||||
|
||||
self._enabled_collectors.discard(collector_name)
|
||||
self._collector_configs[collector_name].enabled = False
|
||||
|
||||
# Stop the collector (only if event loop is running)
|
||||
collector = self._collectors[collector_name]
|
||||
try:
|
||||
asyncio.create_task(collector.stop(force=True))
|
||||
except RuntimeError:
|
||||
# No event loop running, just log
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug(
|
||||
f"Collector {collector_name} disabled but cannot stop (no event loop)"
|
||||
)
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(f"Disabled collector: {collector_name}")
|
||||
return True
|
||||
|
||||
async def _start_collector(self, collector_name: str) -> bool:
|
||||
"""
|
||||
Start a specific collector.
|
||||
|
||||
Args:
|
||||
collector_name: Name of the collector to start
|
||||
|
||||
Returns:
|
||||
True if started successfully, False otherwise
|
||||
"""
|
||||
if collector_name not in self._collectors:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning(f"Collector not found: {collector_name}")
|
||||
return False
|
||||
|
||||
collector = self._collectors[collector_name]
|
||||
|
||||
try:
|
||||
success = await collector.start()
|
||||
if success:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(f"Started collector: {collector_name}")
|
||||
else:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Failed to start collector: {collector_name}")
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Error starting collector {collector_name}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
async def restart_collector(self, collector_name: str) -> bool:
|
||||
"""
|
||||
Restart a specific collector.
|
||||
|
||||
Args:
|
||||
collector_name: Name of the collector to restart
|
||||
|
||||
Returns:
|
||||
True if restarted successfully, False otherwise
|
||||
"""
|
||||
if collector_name not in self._collectors:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning(f"Collector not found: {collector_name}")
|
||||
return False
|
||||
|
||||
collector = self._collectors[collector_name]
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(f"Restarting collector: {collector_name}")
|
||||
|
||||
try:
|
||||
success = await collector.restart()
|
||||
if success:
|
||||
self._stats['restarts_performed'] += 1
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(f"Successfully restarted collector: {collector_name}")
|
||||
else:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Failed to restart collector: {collector_name}")
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Error restarting collector {collector_name}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
async def restart_all_collectors(self) -> Dict[str, bool]:
|
||||
"""
|
||||
Restart all enabled collectors.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping collector names to restart success status
|
||||
"""
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info("Restarting all enabled collectors")
|
||||
|
||||
results = {}
|
||||
restart_tasks = []
|
||||
|
||||
for collector_name in self._enabled_collectors:
|
||||
task = asyncio.create_task(self.restart_collector(collector_name))
|
||||
restart_tasks.append((collector_name, task))
|
||||
|
||||
# Wait for all restarts to complete
|
||||
for collector_name, task in restart_tasks:
|
||||
try:
|
||||
results[collector_name] = await task
|
||||
except Exception as e:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Error restarting {collector_name}: {e}", exc_info=True)
|
||||
results[collector_name] = False
|
||||
|
||||
successful_restarts = sum(1 for success in results.values() if success)
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(
|
||||
f"Restart complete - {successful_restarts}/{len(results)} collectors restarted successfully"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
async def start_all_enabled_collectors(self) -> None:
|
||||
"""Start all enabled collectors."""
|
||||
start_tasks = []
|
||||
for collector_name in self._enabled_collectors:
|
||||
task = asyncio.create_task(self._start_collector(collector_name))
|
||||
start_tasks.append(task)
|
||||
|
||||
# Wait for all collectors to start (with timeout)
|
||||
if start_tasks:
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.gather(*start_tasks, return_exceptions=True), timeout=30.0)
|
||||
except asyncio.TimeoutError:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning("Some collectors took too long to start")
|
||||
|
||||
async def stop_all_collectors(self) -> None:
|
||||
"""Stop all collectors."""
|
||||
stop_tasks = []
|
||||
for collector in self._collectors.values():
|
||||
task = asyncio.create_task(collector.stop(force=True))
|
||||
stop_tasks.append(task)
|
||||
|
||||
# Wait for all collectors to stop (with timeout)
|
||||
if stop_tasks:
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.gather(*stop_tasks, return_exceptions=True), timeout=30.0)
|
||||
except asyncio.TimeoutError:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning("Some collectors took too long to stop")
|
||||
|
||||
# Getters for data access
|
||||
def get_collectors(self) -> Dict[str, BaseDataCollector]:
|
||||
"""Get all collectors."""
|
||||
return self._collectors
|
||||
|
||||
def get_collector_configs(self) -> Dict[str, CollectorConfig]:
|
||||
"""Get all collector configurations."""
|
||||
return self._collector_configs
|
||||
|
||||
def get_enabled_collectors(self) -> Set[str]:
|
||||
"""Get enabled collector names."""
|
||||
return self._enabled_collectors
|
||||
|
||||
def get_collector(self, name: str) -> Optional[BaseDataCollector]:
|
||||
"""Get a specific collector by name."""
|
||||
return self._collectors.get(name)
|
||||
|
||||
def get_collector_config(self, name: str) -> Optional[CollectorConfig]:
|
||||
"""Get a specific collector config by name."""
|
||||
return self._collector_configs.get(name)
|
||||
185
data/manager_components/manager_health_monitor.py
Normal file
185
data/manager_components/manager_health_monitor.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
Manager Health Monitor for monitoring collector health and auto-recovery.
|
||||
|
||||
This module handles health monitoring of data collectors including periodic health checks,
|
||||
auto-restart functionality, and health status tracking.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
from typing import Set, Dict, Optional
|
||||
from ..base_collector import BaseDataCollector, CollectorStatus
|
||||
|
||||
|
||||
class ManagerHealthMonitor:
|
||||
"""Monitors the health of data collectors and provides auto-recovery."""
|
||||
|
||||
def __init__(self,
|
||||
global_health_check_interval: float = 60.0,
|
||||
logger_manager=None,
|
||||
lifecycle_manager=None):
|
||||
"""
|
||||
Initialize the health monitor.
|
||||
|
||||
Args:
|
||||
global_health_check_interval: Seconds between global health checks
|
||||
logger_manager: Logger manager instance for logging operations
|
||||
lifecycle_manager: Lifecycle manager for restart operations
|
||||
"""
|
||||
self.global_health_check_interval = global_health_check_interval
|
||||
self.logger_manager = logger_manager
|
||||
self.lifecycle_manager = lifecycle_manager
|
||||
|
||||
# Health monitoring state
|
||||
self._running = False
|
||||
self._last_global_check = datetime.now(timezone.utc)
|
||||
self._global_health_task: Optional[asyncio.Task] = None
|
||||
|
||||
# Health statistics
|
||||
self._health_stats = {
|
||||
'last_global_check': None,
|
||||
'running_collectors': 0,
|
||||
'failed_collectors': 0
|
||||
}
|
||||
|
||||
def set_running_state(self, running: bool) -> None:
|
||||
"""Set the running state of the monitor."""
|
||||
self._running = running
|
||||
|
||||
def get_health_stats(self) -> Dict:
|
||||
"""Get health monitoring statistics."""
|
||||
return self._health_stats.copy()
|
||||
|
||||
def get_last_global_check(self) -> datetime:
|
||||
"""Get the timestamp of the last global health check."""
|
||||
return self._last_global_check
|
||||
|
||||
async def start_monitoring(self) -> None:
|
||||
"""Start the global health monitoring task."""
|
||||
if self._global_health_task and not self._global_health_task.done():
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning("Health monitoring is already running")
|
||||
return
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Starting health monitoring")
|
||||
|
||||
self._global_health_task = asyncio.create_task(self._global_health_monitor())
|
||||
|
||||
async def stop_monitoring(self) -> None:
|
||||
"""Stop the global health monitoring task."""
|
||||
if self._global_health_task and not self._global_health_task.done():
|
||||
self._global_health_task.cancel()
|
||||
try:
|
||||
await self._global_health_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Health monitoring stopped")
|
||||
|
||||
async def _global_health_monitor(self) -> None:
|
||||
"""Global health monitoring for all collectors."""
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Starting global health monitor")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await asyncio.sleep(self.global_health_check_interval)
|
||||
|
||||
self._last_global_check = datetime.now(timezone.utc)
|
||||
self._health_stats['last_global_check'] = self._last_global_check
|
||||
|
||||
# Perform health check if lifecycle manager is available
|
||||
if self.lifecycle_manager:
|
||||
await self._perform_health_check()
|
||||
|
||||
except asyncio.CancelledError:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Global health monitor cancelled")
|
||||
break
|
||||
except Exception as e:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Error in global health monitor: {e}", exc_info=True)
|
||||
await asyncio.sleep(self.global_health_check_interval)
|
||||
|
||||
async def _perform_health_check(self) -> None:
|
||||
"""Perform health check on all enabled collectors."""
|
||||
if not self.lifecycle_manager:
|
||||
return
|
||||
|
||||
enabled_collectors = self.lifecycle_manager.get_enabled_collectors()
|
||||
collectors = self.lifecycle_manager.get_collectors()
|
||||
|
||||
running_count = 0
|
||||
failed_count = 0
|
||||
|
||||
for collector_name in enabled_collectors:
|
||||
if collector_name not in collectors:
|
||||
continue
|
||||
|
||||
collector = collectors[collector_name]
|
||||
health_status = collector.get_health_status()
|
||||
|
||||
if health_status['is_healthy'] and collector.status == CollectorStatus.RUNNING:
|
||||
running_count += 1
|
||||
elif not health_status['is_healthy']:
|
||||
failed_count += 1
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning(
|
||||
f"Collector {collector_name} is unhealthy: {health_status['issues']}"
|
||||
)
|
||||
|
||||
# Auto-restart if needed and not already restarting
|
||||
config = self.lifecycle_manager.get_collector_config(collector_name)
|
||||
if (config and config.auto_restart and
|
||||
collector.status not in [CollectorStatus.STARTING, CollectorStatus.STOPPING]):
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_info(f"Auto-restarting unhealthy collector: {collector_name}")
|
||||
|
||||
# Create restart task without awaiting to avoid blocking
|
||||
asyncio.create_task(self.lifecycle_manager.restart_collector(collector_name))
|
||||
|
||||
# Update health statistics
|
||||
self._health_stats['running_collectors'] = running_count
|
||||
self._health_stats['failed_collectors'] = failed_count
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug(
|
||||
f"Health check complete - Running: {running_count}, Failed: {failed_count}"
|
||||
)
|
||||
|
||||
async def perform_immediate_health_check(self) -> Dict[str, Dict]:
|
||||
"""
|
||||
Perform an immediate health check on all collectors.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping collector names to their health status
|
||||
"""
|
||||
if not self.lifecycle_manager:
|
||||
return {}
|
||||
|
||||
enabled_collectors = self.lifecycle_manager.get_enabled_collectors()
|
||||
collectors = self.lifecycle_manager.get_collectors()
|
||||
|
||||
health_results = {}
|
||||
|
||||
for collector_name in enabled_collectors:
|
||||
if collector_name not in collectors:
|
||||
continue
|
||||
|
||||
collector = collectors[collector_name]
|
||||
health_status = collector.get_health_status()
|
||||
|
||||
health_results[collector_name] = {
|
||||
'is_healthy': health_status['is_healthy'],
|
||||
'status': collector.status.value,
|
||||
'issues': health_status.get('issues', [])
|
||||
}
|
||||
|
||||
return health_results
|
||||
|
||||
def get_health_task(self) -> Optional[asyncio.Task]:
|
||||
"""Get the current health monitoring task."""
|
||||
return self._global_health_task
|
||||
136
data/manager_components/manager_logger.py
Normal file
136
data/manager_components/manager_logger.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Manager Logger for centralized logging operations.
|
||||
|
||||
This module provides a centralized logging interface for the collector management system
|
||||
with configurable log levels and error sanitization.
|
||||
"""
|
||||
|
||||
from typing import Optional, Any
|
||||
|
||||
|
||||
class ManagerLogger:
|
||||
"""Centralized logger wrapper for collector management operations."""
|
||||
|
||||
def __init__(self, logger=None, log_errors_only: bool = False):
|
||||
"""
|
||||
Initialize the manager logger.
|
||||
|
||||
Args:
|
||||
logger: Logger instance. If None, no logging will be performed.
|
||||
log_errors_only: If True and logger is provided, only log error-level messages
|
||||
"""
|
||||
self.logger = logger
|
||||
self.log_errors_only = log_errors_only
|
||||
|
||||
def _sanitize_error(self, message: str) -> str:
|
||||
"""
|
||||
Sanitize error message to prevent leaking internal details.
|
||||
|
||||
Args:
|
||||
message: Original error message
|
||||
|
||||
Returns:
|
||||
Sanitized error message
|
||||
"""
|
||||
# Remove sensitive patterns that might leak internal information
|
||||
sensitive_patterns = [
|
||||
'password=',
|
||||
'token=',
|
||||
'key=',
|
||||
'secret=',
|
||||
'auth=',
|
||||
'api_key=',
|
||||
'api_secret=',
|
||||
'access_token=',
|
||||
'refresh_token='
|
||||
]
|
||||
|
||||
sanitized = message
|
||||
for pattern in sensitive_patterns:
|
||||
if pattern.lower() in sanitized.lower():
|
||||
# Replace the value part after = with [REDACTED]
|
||||
parts = sanitized.split(pattern)
|
||||
if len(parts) > 1:
|
||||
# Find the end of the value (space, comma, or end of string)
|
||||
value_part = parts[1]
|
||||
end_chars = [' ', ',', ')', ']', '}', '\n', '\t']
|
||||
end_idx = len(value_part)
|
||||
|
||||
for char in end_chars:
|
||||
char_idx = value_part.find(char)
|
||||
if char_idx != -1 and char_idx < end_idx:
|
||||
end_idx = char_idx
|
||||
|
||||
# Replace the value with [REDACTED]
|
||||
sanitized = parts[0] + pattern + '[REDACTED]' + value_part[end_idx:]
|
||||
|
||||
return sanitized
|
||||
|
||||
def log_debug(self, message: str) -> None:
|
||||
"""Log debug message if logger is available and not in errors-only mode."""
|
||||
if self.logger and not self.log_errors_only:
|
||||
self.logger.debug(message)
|
||||
|
||||
def log_info(self, message: str) -> None:
|
||||
"""Log info message if logger is available and not in errors-only mode."""
|
||||
if self.logger and not self.log_errors_only:
|
||||
self.logger.info(message)
|
||||
|
||||
def log_warning(self, message: str) -> None:
|
||||
"""Log warning message if logger is available and not in errors-only mode."""
|
||||
if self.logger and not self.log_errors_only:
|
||||
self.logger.warning(message)
|
||||
|
||||
def log_error(self, message: str, exc_info: bool = False) -> None:
|
||||
"""
|
||||
Log error message if logger is available (always logs errors regardless of log_errors_only).
|
||||
|
||||
Args:
|
||||
message: Error message to log
|
||||
exc_info: Whether to include exception info
|
||||
"""
|
||||
if self.logger:
|
||||
sanitized_message = self._sanitize_error(message)
|
||||
self.logger.error(sanitized_message, exc_info=exc_info)
|
||||
|
||||
def log_critical(self, message: str, exc_info: bool = False) -> None:
|
||||
"""
|
||||
Log critical message if logger is available (always logs critical regardless of log_errors_only).
|
||||
|
||||
Args:
|
||||
message: Critical message to log
|
||||
exc_info: Whether to include exception info
|
||||
"""
|
||||
if self.logger:
|
||||
sanitized_message = self._sanitize_error(message)
|
||||
self.logger.critical(sanitized_message, exc_info=exc_info)
|
||||
|
||||
def is_enabled(self) -> bool:
|
||||
"""Check if logging is enabled."""
|
||||
return self.logger is not None
|
||||
|
||||
def is_debug_enabled(self) -> bool:
|
||||
"""Check if debug logging is enabled."""
|
||||
return self.logger is not None and not self.log_errors_only
|
||||
|
||||
def set_logger(self, logger) -> None:
|
||||
"""Set or update the logger instance."""
|
||||
self.logger = logger
|
||||
|
||||
def set_errors_only(self, errors_only: bool) -> None:
|
||||
"""Set or update the errors-only mode."""
|
||||
self.log_errors_only = errors_only
|
||||
|
||||
def get_logger_info(self) -> dict:
|
||||
"""
|
||||
Get information about the logger configuration.
|
||||
|
||||
Returns:
|
||||
Dictionary with logger configuration details
|
||||
"""
|
||||
return {
|
||||
'logger_available': self.logger is not None,
|
||||
'logger_name': getattr(self.logger, 'name', None) if self.logger else None,
|
||||
'log_errors_only': self.log_errors_only,
|
||||
'debug_enabled': self.is_debug_enabled()
|
||||
}
|
||||
275
data/manager_components/manager_stats_tracker.py
Normal file
275
data/manager_components/manager_stats_tracker.py
Normal file
@@ -0,0 +1,275 @@
|
||||
"""
|
||||
Manager Statistics Tracker for managing collector statistics and caching.
|
||||
|
||||
This module handles statistics collection, caching, and periodic updates
|
||||
to optimize performance by avoiding real-time calculations on every status request.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, Any, Optional, List
|
||||
from ..base_collector import BaseDataCollector, CollectorStatus
|
||||
|
||||
|
||||
class ManagerStatsTracker:
|
||||
"""Manages statistics tracking and caching for the collector manager."""
|
||||
|
||||
def __init__(self,
|
||||
cache_update_interval: float = 30.0,
|
||||
logger_manager=None,
|
||||
lifecycle_manager=None,
|
||||
health_monitor=None):
|
||||
"""
|
||||
Initialize the statistics tracker.
|
||||
|
||||
Args:
|
||||
cache_update_interval: Seconds between cache updates
|
||||
logger_manager: Logger manager instance for logging operations
|
||||
lifecycle_manager: Lifecycle manager for accessing collectors
|
||||
health_monitor: Health monitor for accessing health stats
|
||||
"""
|
||||
self.cache_update_interval = cache_update_interval
|
||||
self.logger_manager = logger_manager
|
||||
self.lifecycle_manager = lifecycle_manager
|
||||
self.health_monitor = health_monitor
|
||||
|
||||
# Statistics storage
|
||||
self._stats = {
|
||||
'total_collectors': 0,
|
||||
'running_collectors': 0,
|
||||
'failed_collectors': 0,
|
||||
'restarts_performed': 0,
|
||||
'last_global_check': None,
|
||||
'uptime_start': None
|
||||
}
|
||||
|
||||
# Cache management
|
||||
self._cached_status: Optional[Dict[str, Any]] = None
|
||||
self._cache_last_updated: Optional[datetime] = None
|
||||
self._cache_update_task: Optional[asyncio.Task] = None
|
||||
self._running = False
|
||||
|
||||
def set_running_state(self, running: bool) -> None:
|
||||
"""Set the running state of the tracker."""
|
||||
self._running = running
|
||||
if running:
|
||||
self._stats['uptime_start'] = datetime.now(timezone.utc)
|
||||
else:
|
||||
self._stats['uptime_start'] = None
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get current statistics."""
|
||||
return self._stats.copy()
|
||||
|
||||
def update_stat(self, key: str, value: Any) -> None:
|
||||
"""Update a specific statistic."""
|
||||
self._stats[key] = value
|
||||
|
||||
def increment_stat(self, key: str, amount: int = 1) -> None:
|
||||
"""Increment a numeric statistic."""
|
||||
if key in self._stats and isinstance(self._stats[key], (int, float)):
|
||||
self._stats[key] += amount
|
||||
else:
|
||||
self._stats[key] = amount
|
||||
|
||||
async def start_cache_updates(self) -> None:
|
||||
"""Start the background cache update task."""
|
||||
if self._cache_update_task and not self._cache_update_task.done():
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_warning("Cache updates are already running")
|
||||
return
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Starting statistics cache updates")
|
||||
|
||||
self._cache_update_task = asyncio.create_task(self._cache_update_loop())
|
||||
|
||||
async def stop_cache_updates(self) -> None:
|
||||
"""Stop the background cache update task."""
|
||||
if self._cache_update_task and not self._cache_update_task.done():
|
||||
self._cache_update_task.cancel()
|
||||
try:
|
||||
await self._cache_update_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Statistics cache updates stopped")
|
||||
|
||||
async def _cache_update_loop(self) -> None:
|
||||
"""Background loop for updating cached statistics."""
|
||||
while self._running:
|
||||
try:
|
||||
await asyncio.sleep(self.cache_update_interval)
|
||||
await self._update_cached_status()
|
||||
|
||||
except asyncio.CancelledError:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Statistics cache update loop cancelled")
|
||||
break
|
||||
except Exception as e:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Error in statistics cache update: {e}", exc_info=True)
|
||||
await asyncio.sleep(self.cache_update_interval)
|
||||
|
||||
async def _update_cached_status(self) -> None:
|
||||
"""Update the cached status information."""
|
||||
try:
|
||||
# Update basic stats from lifecycle manager
|
||||
if self.lifecycle_manager:
|
||||
lifecycle_stats = self.lifecycle_manager.get_stats()
|
||||
self._stats.update(lifecycle_stats)
|
||||
|
||||
# Update health stats from health monitor
|
||||
if self.health_monitor:
|
||||
health_stats = self.health_monitor.get_health_stats()
|
||||
self._stats.update(health_stats)
|
||||
|
||||
# Calculate uptime
|
||||
uptime_seconds = None
|
||||
if self._stats['uptime_start']:
|
||||
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
|
||||
|
||||
# Build cached status
|
||||
self._cached_status = self._build_status_dict(uptime_seconds)
|
||||
self._cache_last_updated = datetime.now(timezone.utc)
|
||||
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_debug("Statistics cache updated")
|
||||
|
||||
except Exception as e:
|
||||
if self.logger_manager:
|
||||
self.logger_manager.log_error(f"Failed to update statistics cache: {e}", exc_info=True)
|
||||
|
||||
def _build_status_dict(self, uptime_seconds: Optional[float]) -> Dict[str, Any]:
|
||||
"""Build the complete status dictionary."""
|
||||
# Get individual collector statuses
|
||||
collector_statuses = {}
|
||||
if self.lifecycle_manager:
|
||||
collectors = self.lifecycle_manager.get_collectors()
|
||||
enabled_collectors = self.lifecycle_manager.get_enabled_collectors()
|
||||
|
||||
for name, collector in collectors.items():
|
||||
collector_statuses[name] = {
|
||||
'status': collector.status.value,
|
||||
'enabled': name in enabled_collectors,
|
||||
'health': collector.get_health_status()
|
||||
}
|
||||
|
||||
return {
|
||||
'uptime_seconds': uptime_seconds,
|
||||
'statistics': self._stats.copy(),
|
||||
'collectors': collector_statuses,
|
||||
'enabled_collectors': list(self.lifecycle_manager.get_enabled_collectors()) if self.lifecycle_manager else [],
|
||||
'total_collectors': len(collector_statuses),
|
||||
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None
|
||||
}
|
||||
|
||||
def get_status(self, force_refresh: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Get manager status and statistics.
|
||||
|
||||
Args:
|
||||
force_refresh: If True, bypass cache and calculate real-time
|
||||
|
||||
Returns:
|
||||
Dictionary containing status information
|
||||
"""
|
||||
# Return cached status if available and not forcing refresh
|
||||
if not force_refresh and self._cached_status and self._cache_last_updated:
|
||||
# Check if cache is recent enough (within 2x the update interval)
|
||||
cache_age = (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
|
||||
if cache_age <= (self.cache_update_interval * 2):
|
||||
return self._cached_status.copy()
|
||||
|
||||
# Calculate real-time status
|
||||
uptime_seconds = None
|
||||
if self._stats['uptime_start']:
|
||||
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
|
||||
|
||||
return self._build_status_dict(uptime_seconds)
|
||||
|
||||
def get_collector_status(self, collector_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get status for a specific collector.
|
||||
|
||||
Args:
|
||||
collector_name: Name of the collector
|
||||
|
||||
Returns:
|
||||
Collector status dict or None if not found
|
||||
"""
|
||||
if not self.lifecycle_manager:
|
||||
return None
|
||||
|
||||
collector = self.lifecycle_manager.get_collector(collector_name)
|
||||
if not collector:
|
||||
return None
|
||||
|
||||
config = self.lifecycle_manager.get_collector_config(collector_name)
|
||||
|
||||
return {
|
||||
'name': collector_name,
|
||||
'config': config.__dict__ if config else {},
|
||||
'status': collector.get_status(),
|
||||
'health': collector.get_health_status()
|
||||
}
|
||||
|
||||
def list_collectors(self) -> List[str]:
|
||||
"""
|
||||
List all managed collector names.
|
||||
|
||||
Returns:
|
||||
List of collector names
|
||||
"""
|
||||
if self.lifecycle_manager:
|
||||
return list(self.lifecycle_manager.get_collectors().keys())
|
||||
return []
|
||||
|
||||
def get_running_collectors(self) -> List[str]:
|
||||
"""
|
||||
Get names of currently running collectors.
|
||||
|
||||
Returns:
|
||||
List of running collector names
|
||||
"""
|
||||
running = []
|
||||
if self.lifecycle_manager:
|
||||
collectors = self.lifecycle_manager.get_collectors()
|
||||
for name, collector in collectors.items():
|
||||
if collector.status == CollectorStatus.RUNNING:
|
||||
running.append(name)
|
||||
return running
|
||||
|
||||
def get_failed_collectors(self) -> List[str]:
|
||||
"""
|
||||
Get names of failed or unhealthy collectors.
|
||||
|
||||
Returns:
|
||||
List of failed collector names
|
||||
"""
|
||||
failed = []
|
||||
if self.lifecycle_manager:
|
||||
collectors = self.lifecycle_manager.get_collectors()
|
||||
for name, collector in collectors.items():
|
||||
health_status = collector.get_health_status()
|
||||
if not health_status['is_healthy']:
|
||||
failed.append(name)
|
||||
return failed
|
||||
|
||||
def force_cache_refresh(self) -> None:
|
||||
"""Force an immediate cache refresh."""
|
||||
if self._running:
|
||||
asyncio.create_task(self._update_cached_status())
|
||||
|
||||
def get_cache_info(self) -> Dict[str, Any]:
|
||||
"""Get information about the cache state."""
|
||||
return {
|
||||
'cache_enabled': True,
|
||||
'cache_update_interval': self.cache_update_interval,
|
||||
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None,
|
||||
'cache_age_seconds': (
|
||||
(datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
|
||||
if self._cache_last_updated else None
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user