TCPDashboard/data/manager_components/manager_stats_tracker.py
Vasily.onl 2890ba2efa Implement Service Configuration Manager for data collection service
- Introduced `service_config.py` to manage configuration loading, validation, and schema management, enhancing modularity and security.
- Created a `ServiceConfig` class for handling configuration with robust error handling and default values.
- Refactored `DataCollectionService` to utilize the new `ServiceConfig`, streamlining configuration management and improving readability.
- Added a `CollectorFactory` to encapsulate collector creation logic, promoting separation of concerns.
- Updated `CollectorManager` and related components to align with the new architecture, ensuring better maintainability.
- Enhanced logging practices across the service for improved monitoring and debugging.

These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity and performance.
2025-06-10 12:55:27 +08:00

275 lines
11 KiB
Python

"""
Manager Statistics Tracker for managing collector statistics and caching.
This module handles statistics collection, caching, and periodic updates
to optimize performance by avoiding real-time calculations on every status request.
"""
import asyncio
from datetime import datetime, timezone
from typing import Dict, Any, Optional, List
from ..base_collector import BaseDataCollector, CollectorStatus
class ManagerStatsTracker:
"""Manages statistics tracking and caching for the collector manager."""
def __init__(self,
cache_update_interval: float = 30.0,
logger_manager=None,
lifecycle_manager=None,
health_monitor=None):
"""
Initialize the statistics tracker.
Args:
cache_update_interval: Seconds between cache updates
logger_manager: Logger manager instance for logging operations
lifecycle_manager: Lifecycle manager for accessing collectors
health_monitor: Health monitor for accessing health stats
"""
self.cache_update_interval = cache_update_interval
self.logger_manager = logger_manager
self.lifecycle_manager = lifecycle_manager
self.health_monitor = health_monitor
# Statistics storage
self._stats = {
'total_collectors': 0,
'running_collectors': 0,
'failed_collectors': 0,
'restarts_performed': 0,
'last_global_check': None,
'uptime_start': None
}
# Cache management
self._cached_status: Optional[Dict[str, Any]] = None
self._cache_last_updated: Optional[datetime] = None
self._cache_update_task: Optional[asyncio.Task] = None
self._running = False
def set_running_state(self, running: bool) -> None:
"""Set the running state of the tracker."""
self._running = running
if running:
self._stats['uptime_start'] = datetime.now(timezone.utc)
else:
self._stats['uptime_start'] = None
def get_stats(self) -> Dict[str, Any]:
"""Get current statistics."""
return self._stats.copy()
def update_stat(self, key: str, value: Any) -> None:
"""Update a specific statistic."""
self._stats[key] = value
def increment_stat(self, key: str, amount: int = 1) -> None:
"""Increment a numeric statistic."""
if key in self._stats and isinstance(self._stats[key], (int, float)):
self._stats[key] += amount
else:
self._stats[key] = amount
async def start_cache_updates(self) -> None:
"""Start the background cache update task."""
if self._cache_update_task and not self._cache_update_task.done():
if self.logger_manager:
self.logger_manager.log_warning("Cache updates are already running")
return
if self.logger_manager:
self.logger_manager.log_debug("Starting statistics cache updates")
self._cache_update_task = asyncio.create_task(self._cache_update_loop())
async def stop_cache_updates(self) -> None:
"""Stop the background cache update task."""
if self._cache_update_task and not self._cache_update_task.done():
self._cache_update_task.cancel()
try:
await self._cache_update_task
except asyncio.CancelledError:
pass
if self.logger_manager:
self.logger_manager.log_debug("Statistics cache updates stopped")
async def _cache_update_loop(self) -> None:
"""Background loop for updating cached statistics."""
while self._running:
try:
await asyncio.sleep(self.cache_update_interval)
await self._update_cached_status()
except asyncio.CancelledError:
if self.logger_manager:
self.logger_manager.log_debug("Statistics cache update loop cancelled")
break
except Exception as e:
if self.logger_manager:
self.logger_manager.log_error(f"Error in statistics cache update: {e}", exc_info=True)
await asyncio.sleep(self.cache_update_interval)
async def _update_cached_status(self) -> None:
"""Update the cached status information."""
try:
# Update basic stats from lifecycle manager
if self.lifecycle_manager:
lifecycle_stats = self.lifecycle_manager.get_stats()
self._stats.update(lifecycle_stats)
# Update health stats from health monitor
if self.health_monitor:
health_stats = self.health_monitor.get_health_stats()
self._stats.update(health_stats)
# Calculate uptime
uptime_seconds = None
if self._stats['uptime_start']:
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
# Build cached status
self._cached_status = self._build_status_dict(uptime_seconds)
self._cache_last_updated = datetime.now(timezone.utc)
if self.logger_manager:
self.logger_manager.log_debug("Statistics cache updated")
except Exception as e:
if self.logger_manager:
self.logger_manager.log_error(f"Failed to update statistics cache: {e}", exc_info=True)
def _build_status_dict(self, uptime_seconds: Optional[float]) -> Dict[str, Any]:
"""Build the complete status dictionary."""
# Get individual collector statuses
collector_statuses = {}
if self.lifecycle_manager:
collectors = self.lifecycle_manager.get_collectors()
enabled_collectors = self.lifecycle_manager.get_enabled_collectors()
for name, collector in collectors.items():
collector_statuses[name] = {
'status': collector.status.value,
'enabled': name in enabled_collectors,
'health': collector.get_health_status()
}
return {
'uptime_seconds': uptime_seconds,
'statistics': self._stats.copy(),
'collectors': collector_statuses,
'enabled_collectors': list(self.lifecycle_manager.get_enabled_collectors()) if self.lifecycle_manager else [],
'total_collectors': len(collector_statuses),
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None
}
def get_status(self, force_refresh: bool = False) -> Dict[str, Any]:
"""
Get manager status and statistics.
Args:
force_refresh: If True, bypass cache and calculate real-time
Returns:
Dictionary containing status information
"""
# Return cached status if available and not forcing refresh
if not force_refresh and self._cached_status and self._cache_last_updated:
# Check if cache is recent enough (within 2x the update interval)
cache_age = (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
if cache_age <= (self.cache_update_interval * 2):
return self._cached_status.copy()
# Calculate real-time status
uptime_seconds = None
if self._stats['uptime_start']:
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
return self._build_status_dict(uptime_seconds)
def get_collector_status(self, collector_name: str) -> Optional[Dict[str, Any]]:
"""
Get status for a specific collector.
Args:
collector_name: Name of the collector
Returns:
Collector status dict or None if not found
"""
if not self.lifecycle_manager:
return None
collector = self.lifecycle_manager.get_collector(collector_name)
if not collector:
return None
config = self.lifecycle_manager.get_collector_config(collector_name)
return {
'name': collector_name,
'config': config.__dict__ if config else {},
'status': collector.get_status(),
'health': collector.get_health_status()
}
def list_collectors(self) -> List[str]:
"""
List all managed collector names.
Returns:
List of collector names
"""
if self.lifecycle_manager:
return list(self.lifecycle_manager.get_collectors().keys())
return []
def get_running_collectors(self) -> List[str]:
"""
Get names of currently running collectors.
Returns:
List of running collector names
"""
running = []
if self.lifecycle_manager:
collectors = self.lifecycle_manager.get_collectors()
for name, collector in collectors.items():
if collector.status == CollectorStatus.RUNNING:
running.append(name)
return running
def get_failed_collectors(self) -> List[str]:
"""
Get names of failed or unhealthy collectors.
Returns:
List of failed collector names
"""
failed = []
if self.lifecycle_manager:
collectors = self.lifecycle_manager.get_collectors()
for name, collector in collectors.items():
health_status = collector.get_health_status()
if not health_status['is_healthy']:
failed.append(name)
return failed
def force_cache_refresh(self) -> None:
"""Force an immediate cache refresh."""
if self._running:
asyncio.create_task(self._update_cached_status())
def get_cache_info(self) -> Dict[str, Any]:
"""Get information about the cache state."""
return {
'cache_enabled': True,
'cache_update_interval': self.cache_update_interval,
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None,
'cache_age_seconds': (
(datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
if self._cache_last_updated else None
)
}