275 lines
11 KiB
Python
275 lines
11 KiB
Python
|
|
"""
|
||
|
|
Manager Statistics Tracker for managing collector statistics and caching.
|
||
|
|
|
||
|
|
This module handles statistics collection, caching, and periodic updates
|
||
|
|
to optimize performance by avoiding real-time calculations on every status request.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
from typing import Dict, Any, Optional, List
|
||
|
|
from ..base_collector import BaseDataCollector, CollectorStatus
|
||
|
|
|
||
|
|
|
||
|
|
class ManagerStatsTracker:
|
||
|
|
"""Manages statistics tracking and caching for the collector manager."""
|
||
|
|
|
||
|
|
def __init__(self,
|
||
|
|
cache_update_interval: float = 30.0,
|
||
|
|
logger_manager=None,
|
||
|
|
lifecycle_manager=None,
|
||
|
|
health_monitor=None):
|
||
|
|
"""
|
||
|
|
Initialize the statistics tracker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
cache_update_interval: Seconds between cache updates
|
||
|
|
logger_manager: Logger manager instance for logging operations
|
||
|
|
lifecycle_manager: Lifecycle manager for accessing collectors
|
||
|
|
health_monitor: Health monitor for accessing health stats
|
||
|
|
"""
|
||
|
|
self.cache_update_interval = cache_update_interval
|
||
|
|
self.logger_manager = logger_manager
|
||
|
|
self.lifecycle_manager = lifecycle_manager
|
||
|
|
self.health_monitor = health_monitor
|
||
|
|
|
||
|
|
# Statistics storage
|
||
|
|
self._stats = {
|
||
|
|
'total_collectors': 0,
|
||
|
|
'running_collectors': 0,
|
||
|
|
'failed_collectors': 0,
|
||
|
|
'restarts_performed': 0,
|
||
|
|
'last_global_check': None,
|
||
|
|
'uptime_start': None
|
||
|
|
}
|
||
|
|
|
||
|
|
# Cache management
|
||
|
|
self._cached_status: Optional[Dict[str, Any]] = None
|
||
|
|
self._cache_last_updated: Optional[datetime] = None
|
||
|
|
self._cache_update_task: Optional[asyncio.Task] = None
|
||
|
|
self._running = False
|
||
|
|
|
||
|
|
def set_running_state(self, running: bool) -> None:
|
||
|
|
"""Set the running state of the tracker."""
|
||
|
|
self._running = running
|
||
|
|
if running:
|
||
|
|
self._stats['uptime_start'] = datetime.now(timezone.utc)
|
||
|
|
else:
|
||
|
|
self._stats['uptime_start'] = None
|
||
|
|
|
||
|
|
def get_stats(self) -> Dict[str, Any]:
|
||
|
|
"""Get current statistics."""
|
||
|
|
return self._stats.copy()
|
||
|
|
|
||
|
|
def update_stat(self, key: str, value: Any) -> None:
|
||
|
|
"""Update a specific statistic."""
|
||
|
|
self._stats[key] = value
|
||
|
|
|
||
|
|
def increment_stat(self, key: str, amount: int = 1) -> None:
|
||
|
|
"""Increment a numeric statistic."""
|
||
|
|
if key in self._stats and isinstance(self._stats[key], (int, float)):
|
||
|
|
self._stats[key] += amount
|
||
|
|
else:
|
||
|
|
self._stats[key] = amount
|
||
|
|
|
||
|
|
async def start_cache_updates(self) -> None:
|
||
|
|
"""Start the background cache update task."""
|
||
|
|
if self._cache_update_task and not self._cache_update_task.done():
|
||
|
|
if self.logger_manager:
|
||
|
|
self.logger_manager.log_warning("Cache updates are already running")
|
||
|
|
return
|
||
|
|
|
||
|
|
if self.logger_manager:
|
||
|
|
self.logger_manager.log_debug("Starting statistics cache updates")
|
||
|
|
|
||
|
|
self._cache_update_task = asyncio.create_task(self._cache_update_loop())
|
||
|
|
|
||
|
|
async def stop_cache_updates(self) -> None:
|
||
|
|
"""Stop the background cache update task."""
|
||
|
|
if self._cache_update_task and not self._cache_update_task.done():
|
||
|
|
self._cache_update_task.cancel()
|
||
|
|
try:
|
||
|
|
await self._cache_update_task
|
||
|
|
except asyncio.CancelledError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
if self.logger_manager:
|
||
|
|
self.logger_manager.log_debug("Statistics cache updates stopped")
|
||
|
|
|
||
|
|
async def _cache_update_loop(self) -> None:
|
||
|
|
"""Background loop for updating cached statistics."""
|
||
|
|
while self._running:
|
||
|
|
try:
|
||
|
|
await asyncio.sleep(self.cache_update_interval)
|
||
|
|
await self._update_cached_status()
|
||
|
|
|
||
|
|
except asyncio.CancelledError:
|
||
|
|
if self.logger_manager:
|
||
|
|
self.logger_manager.log_debug("Statistics cache update loop cancelled")
|
||
|
|
break
|
||
|
|
except Exception as e:
|
||
|
|
if self.logger_manager:
|
||
|
|
self.logger_manager.log_error(f"Error in statistics cache update: {e}", exc_info=True)
|
||
|
|
await asyncio.sleep(self.cache_update_interval)
|
||
|
|
|
||
|
|
async def _update_cached_status(self) -> None:
|
||
|
|
"""Update the cached status information."""
|
||
|
|
try:
|
||
|
|
# Update basic stats from lifecycle manager
|
||
|
|
if self.lifecycle_manager:
|
||
|
|
lifecycle_stats = self.lifecycle_manager.get_stats()
|
||
|
|
self._stats.update(lifecycle_stats)
|
||
|
|
|
||
|
|
# Update health stats from health monitor
|
||
|
|
if self.health_monitor:
|
||
|
|
health_stats = self.health_monitor.get_health_stats()
|
||
|
|
self._stats.update(health_stats)
|
||
|
|
|
||
|
|
# Calculate uptime
|
||
|
|
uptime_seconds = None
|
||
|
|
if self._stats['uptime_start']:
|
||
|
|
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
|
||
|
|
|
||
|
|
# Build cached status
|
||
|
|
self._cached_status = self._build_status_dict(uptime_seconds)
|
||
|
|
self._cache_last_updated = datetime.now(timezone.utc)
|
||
|
|
|
||
|
|
if self.logger_manager:
|
||
|
|
self.logger_manager.log_debug("Statistics cache updated")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
if self.logger_manager:
|
||
|
|
self.logger_manager.log_error(f"Failed to update statistics cache: {e}", exc_info=True)
|
||
|
|
|
||
|
|
def _build_status_dict(self, uptime_seconds: Optional[float]) -> Dict[str, Any]:
|
||
|
|
"""Build the complete status dictionary."""
|
||
|
|
# Get individual collector statuses
|
||
|
|
collector_statuses = {}
|
||
|
|
if self.lifecycle_manager:
|
||
|
|
collectors = self.lifecycle_manager.get_collectors()
|
||
|
|
enabled_collectors = self.lifecycle_manager.get_enabled_collectors()
|
||
|
|
|
||
|
|
for name, collector in collectors.items():
|
||
|
|
collector_statuses[name] = {
|
||
|
|
'status': collector.status.value,
|
||
|
|
'enabled': name in enabled_collectors,
|
||
|
|
'health': collector.get_health_status()
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
'uptime_seconds': uptime_seconds,
|
||
|
|
'statistics': self._stats.copy(),
|
||
|
|
'collectors': collector_statuses,
|
||
|
|
'enabled_collectors': list(self.lifecycle_manager.get_enabled_collectors()) if self.lifecycle_manager else [],
|
||
|
|
'total_collectors': len(collector_statuses),
|
||
|
|
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None
|
||
|
|
}
|
||
|
|
|
||
|
|
def get_status(self, force_refresh: bool = False) -> Dict[str, Any]:
|
||
|
|
"""
|
||
|
|
Get manager status and statistics.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
force_refresh: If True, bypass cache and calculate real-time
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Dictionary containing status information
|
||
|
|
"""
|
||
|
|
# Return cached status if available and not forcing refresh
|
||
|
|
if not force_refresh and self._cached_status and self._cache_last_updated:
|
||
|
|
# Check if cache is recent enough (within 2x the update interval)
|
||
|
|
cache_age = (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
|
||
|
|
if cache_age <= (self.cache_update_interval * 2):
|
||
|
|
return self._cached_status.copy()
|
||
|
|
|
||
|
|
# Calculate real-time status
|
||
|
|
uptime_seconds = None
|
||
|
|
if self._stats['uptime_start']:
|
||
|
|
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
|
||
|
|
|
||
|
|
return self._build_status_dict(uptime_seconds)
|
||
|
|
|
||
|
|
def get_collector_status(self, collector_name: str) -> Optional[Dict[str, Any]]:
|
||
|
|
"""
|
||
|
|
Get status for a specific collector.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
collector_name: Name of the collector
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Collector status dict or None if not found
|
||
|
|
"""
|
||
|
|
if not self.lifecycle_manager:
|
||
|
|
return None
|
||
|
|
|
||
|
|
collector = self.lifecycle_manager.get_collector(collector_name)
|
||
|
|
if not collector:
|
||
|
|
return None
|
||
|
|
|
||
|
|
config = self.lifecycle_manager.get_collector_config(collector_name)
|
||
|
|
|
||
|
|
return {
|
||
|
|
'name': collector_name,
|
||
|
|
'config': config.__dict__ if config else {},
|
||
|
|
'status': collector.get_status(),
|
||
|
|
'health': collector.get_health_status()
|
||
|
|
}
|
||
|
|
|
||
|
|
def list_collectors(self) -> List[str]:
|
||
|
|
"""
|
||
|
|
List all managed collector names.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
List of collector names
|
||
|
|
"""
|
||
|
|
if self.lifecycle_manager:
|
||
|
|
return list(self.lifecycle_manager.get_collectors().keys())
|
||
|
|
return []
|
||
|
|
|
||
|
|
def get_running_collectors(self) -> List[str]:
|
||
|
|
"""
|
||
|
|
Get names of currently running collectors.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
List of running collector names
|
||
|
|
"""
|
||
|
|
running = []
|
||
|
|
if self.lifecycle_manager:
|
||
|
|
collectors = self.lifecycle_manager.get_collectors()
|
||
|
|
for name, collector in collectors.items():
|
||
|
|
if collector.status == CollectorStatus.RUNNING:
|
||
|
|
running.append(name)
|
||
|
|
return running
|
||
|
|
|
||
|
|
def get_failed_collectors(self) -> List[str]:
|
||
|
|
"""
|
||
|
|
Get names of failed or unhealthy collectors.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
List of failed collector names
|
||
|
|
"""
|
||
|
|
failed = []
|
||
|
|
if self.lifecycle_manager:
|
||
|
|
collectors = self.lifecycle_manager.get_collectors()
|
||
|
|
for name, collector in collectors.items():
|
||
|
|
health_status = collector.get_health_status()
|
||
|
|
if not health_status['is_healthy']:
|
||
|
|
failed.append(name)
|
||
|
|
return failed
|
||
|
|
|
||
|
|
def force_cache_refresh(self) -> None:
|
||
|
|
"""Force an immediate cache refresh."""
|
||
|
|
if self._running:
|
||
|
|
asyncio.create_task(self._update_cached_status())
|
||
|
|
|
||
|
|
def get_cache_info(self) -> Dict[str, Any]:
|
||
|
|
"""Get information about the cache state."""
|
||
|
|
return {
|
||
|
|
'cache_enabled': True,
|
||
|
|
'cache_update_interval': self.cache_update_interval,
|
||
|
|
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None,
|
||
|
|
'cache_age_seconds': (
|
||
|
|
(datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
|
||
|
|
if self._cache_last_updated else None
|
||
|
|
)
|
||
|
|
}
|