- Introduced `service_config.py` to manage configuration loading, validation, and schema management, enhancing modularity and security. - Created a `ServiceConfig` class for handling configuration with robust error handling and default values. - Refactored `DataCollectionService` to utilize the new `ServiceConfig`, streamlining configuration management and improving readability. - Added a `CollectorFactory` to encapsulate collector creation logic, promoting separation of concerns. - Updated `CollectorManager` and related components to align with the new architecture, ensuring better maintainability. - Enhanced logging practices across the service for improved monitoring and debugging. These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity and performance.
275 lines
11 KiB
Python
275 lines
11 KiB
Python
"""
|
|
Manager Statistics Tracker for managing collector statistics and caching.
|
|
|
|
This module handles statistics collection, caching, and periodic updates
|
|
to optimize performance by avoiding real-time calculations on every status request.
|
|
"""
|
|
|
|
import asyncio
|
|
from datetime import datetime, timezone
|
|
from typing import Dict, Any, Optional, List
|
|
from ..base_collector import BaseDataCollector, CollectorStatus
|
|
|
|
|
|
class ManagerStatsTracker:
|
|
"""Manages statistics tracking and caching for the collector manager."""
|
|
|
|
def __init__(self,
|
|
cache_update_interval: float = 30.0,
|
|
logger_manager=None,
|
|
lifecycle_manager=None,
|
|
health_monitor=None):
|
|
"""
|
|
Initialize the statistics tracker.
|
|
|
|
Args:
|
|
cache_update_interval: Seconds between cache updates
|
|
logger_manager: Logger manager instance for logging operations
|
|
lifecycle_manager: Lifecycle manager for accessing collectors
|
|
health_monitor: Health monitor for accessing health stats
|
|
"""
|
|
self.cache_update_interval = cache_update_interval
|
|
self.logger_manager = logger_manager
|
|
self.lifecycle_manager = lifecycle_manager
|
|
self.health_monitor = health_monitor
|
|
|
|
# Statistics storage
|
|
self._stats = {
|
|
'total_collectors': 0,
|
|
'running_collectors': 0,
|
|
'failed_collectors': 0,
|
|
'restarts_performed': 0,
|
|
'last_global_check': None,
|
|
'uptime_start': None
|
|
}
|
|
|
|
# Cache management
|
|
self._cached_status: Optional[Dict[str, Any]] = None
|
|
self._cache_last_updated: Optional[datetime] = None
|
|
self._cache_update_task: Optional[asyncio.Task] = None
|
|
self._running = False
|
|
|
|
def set_running_state(self, running: bool) -> None:
|
|
"""Set the running state of the tracker."""
|
|
self._running = running
|
|
if running:
|
|
self._stats['uptime_start'] = datetime.now(timezone.utc)
|
|
else:
|
|
self._stats['uptime_start'] = None
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""Get current statistics."""
|
|
return self._stats.copy()
|
|
|
|
def update_stat(self, key: str, value: Any) -> None:
|
|
"""Update a specific statistic."""
|
|
self._stats[key] = value
|
|
|
|
def increment_stat(self, key: str, amount: int = 1) -> None:
|
|
"""Increment a numeric statistic."""
|
|
if key in self._stats and isinstance(self._stats[key], (int, float)):
|
|
self._stats[key] += amount
|
|
else:
|
|
self._stats[key] = amount
|
|
|
|
async def start_cache_updates(self) -> None:
|
|
"""Start the background cache update task."""
|
|
if self._cache_update_task and not self._cache_update_task.done():
|
|
if self.logger_manager:
|
|
self.logger_manager.log_warning("Cache updates are already running")
|
|
return
|
|
|
|
if self.logger_manager:
|
|
self.logger_manager.log_debug("Starting statistics cache updates")
|
|
|
|
self._cache_update_task = asyncio.create_task(self._cache_update_loop())
|
|
|
|
async def stop_cache_updates(self) -> None:
|
|
"""Stop the background cache update task."""
|
|
if self._cache_update_task and not self._cache_update_task.done():
|
|
self._cache_update_task.cancel()
|
|
try:
|
|
await self._cache_update_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
if self.logger_manager:
|
|
self.logger_manager.log_debug("Statistics cache updates stopped")
|
|
|
|
async def _cache_update_loop(self) -> None:
|
|
"""Background loop for updating cached statistics."""
|
|
while self._running:
|
|
try:
|
|
await asyncio.sleep(self.cache_update_interval)
|
|
await self._update_cached_status()
|
|
|
|
except asyncio.CancelledError:
|
|
if self.logger_manager:
|
|
self.logger_manager.log_debug("Statistics cache update loop cancelled")
|
|
break
|
|
except Exception as e:
|
|
if self.logger_manager:
|
|
self.logger_manager.log_error(f"Error in statistics cache update: {e}", exc_info=True)
|
|
await asyncio.sleep(self.cache_update_interval)
|
|
|
|
async def _update_cached_status(self) -> None:
|
|
"""Update the cached status information."""
|
|
try:
|
|
# Update basic stats from lifecycle manager
|
|
if self.lifecycle_manager:
|
|
lifecycle_stats = self.lifecycle_manager.get_stats()
|
|
self._stats.update(lifecycle_stats)
|
|
|
|
# Update health stats from health monitor
|
|
if self.health_monitor:
|
|
health_stats = self.health_monitor.get_health_stats()
|
|
self._stats.update(health_stats)
|
|
|
|
# Calculate uptime
|
|
uptime_seconds = None
|
|
if self._stats['uptime_start']:
|
|
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
|
|
|
|
# Build cached status
|
|
self._cached_status = self._build_status_dict(uptime_seconds)
|
|
self._cache_last_updated = datetime.now(timezone.utc)
|
|
|
|
if self.logger_manager:
|
|
self.logger_manager.log_debug("Statistics cache updated")
|
|
|
|
except Exception as e:
|
|
if self.logger_manager:
|
|
self.logger_manager.log_error(f"Failed to update statistics cache: {e}", exc_info=True)
|
|
|
|
def _build_status_dict(self, uptime_seconds: Optional[float]) -> Dict[str, Any]:
|
|
"""Build the complete status dictionary."""
|
|
# Get individual collector statuses
|
|
collector_statuses = {}
|
|
if self.lifecycle_manager:
|
|
collectors = self.lifecycle_manager.get_collectors()
|
|
enabled_collectors = self.lifecycle_manager.get_enabled_collectors()
|
|
|
|
for name, collector in collectors.items():
|
|
collector_statuses[name] = {
|
|
'status': collector.status.value,
|
|
'enabled': name in enabled_collectors,
|
|
'health': collector.get_health_status()
|
|
}
|
|
|
|
return {
|
|
'uptime_seconds': uptime_seconds,
|
|
'statistics': self._stats.copy(),
|
|
'collectors': collector_statuses,
|
|
'enabled_collectors': list(self.lifecycle_manager.get_enabled_collectors()) if self.lifecycle_manager else [],
|
|
'total_collectors': len(collector_statuses),
|
|
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None
|
|
}
|
|
|
|
def get_status(self, force_refresh: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Get manager status and statistics.
|
|
|
|
Args:
|
|
force_refresh: If True, bypass cache and calculate real-time
|
|
|
|
Returns:
|
|
Dictionary containing status information
|
|
"""
|
|
# Return cached status if available and not forcing refresh
|
|
if not force_refresh and self._cached_status and self._cache_last_updated:
|
|
# Check if cache is recent enough (within 2x the update interval)
|
|
cache_age = (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
|
|
if cache_age <= (self.cache_update_interval * 2):
|
|
return self._cached_status.copy()
|
|
|
|
# Calculate real-time status
|
|
uptime_seconds = None
|
|
if self._stats['uptime_start']:
|
|
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
|
|
|
|
return self._build_status_dict(uptime_seconds)
|
|
|
|
def get_collector_status(self, collector_name: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get status for a specific collector.
|
|
|
|
Args:
|
|
collector_name: Name of the collector
|
|
|
|
Returns:
|
|
Collector status dict or None if not found
|
|
"""
|
|
if not self.lifecycle_manager:
|
|
return None
|
|
|
|
collector = self.lifecycle_manager.get_collector(collector_name)
|
|
if not collector:
|
|
return None
|
|
|
|
config = self.lifecycle_manager.get_collector_config(collector_name)
|
|
|
|
return {
|
|
'name': collector_name,
|
|
'config': config.__dict__ if config else {},
|
|
'status': collector.get_status(),
|
|
'health': collector.get_health_status()
|
|
}
|
|
|
|
def list_collectors(self) -> List[str]:
|
|
"""
|
|
List all managed collector names.
|
|
|
|
Returns:
|
|
List of collector names
|
|
"""
|
|
if self.lifecycle_manager:
|
|
return list(self.lifecycle_manager.get_collectors().keys())
|
|
return []
|
|
|
|
def get_running_collectors(self) -> List[str]:
|
|
"""
|
|
Get names of currently running collectors.
|
|
|
|
Returns:
|
|
List of running collector names
|
|
"""
|
|
running = []
|
|
if self.lifecycle_manager:
|
|
collectors = self.lifecycle_manager.get_collectors()
|
|
for name, collector in collectors.items():
|
|
if collector.status == CollectorStatus.RUNNING:
|
|
running.append(name)
|
|
return running
|
|
|
|
def get_failed_collectors(self) -> List[str]:
|
|
"""
|
|
Get names of failed or unhealthy collectors.
|
|
|
|
Returns:
|
|
List of failed collector names
|
|
"""
|
|
failed = []
|
|
if self.lifecycle_manager:
|
|
collectors = self.lifecycle_manager.get_collectors()
|
|
for name, collector in collectors.items():
|
|
health_status = collector.get_health_status()
|
|
if not health_status['is_healthy']:
|
|
failed.append(name)
|
|
return failed
|
|
|
|
def force_cache_refresh(self) -> None:
|
|
"""Force an immediate cache refresh."""
|
|
if self._running:
|
|
asyncio.create_task(self._update_cached_status())
|
|
|
|
def get_cache_info(self) -> Dict[str, Any]:
|
|
"""Get information about the cache state."""
|
|
return {
|
|
'cache_enabled': True,
|
|
'cache_update_interval': self.cache_update_interval,
|
|
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None,
|
|
'cache_age_seconds': (
|
|
(datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
|
|
if self._cache_last_updated else None
|
|
)
|
|
} |