TCPDashboard/data/manager_components/manager_stats_tracker.py
Vasily.onl f6cb1485b1 Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.

These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00

310 lines
12 KiB
Python

"""
Manager Statistics Tracker for managing collector statistics and caching.
This module handles statistics collection, caching, and periodic updates
to optimize performance by avoiding real-time calculations on every status request.
"""
import asyncio
from datetime import datetime, timezone
from typing import Dict, Any, Optional, List
from ..collector.base_collector import BaseDataCollector, CollectorStatus
class ManagerStatsTracker:
"""Manages statistics tracking and caching for the collector manager."""
def __init__(self,
cache_update_interval: float = 30.0,
logger_manager=None,
lifecycle_manager=None,
health_monitor=None):
"""
Initialize the statistics tracker.
Args:
cache_update_interval: Seconds between cache updates
logger_manager: Logger manager instance for logging operations
lifecycle_manager: Lifecycle manager for accessing collectors
health_monitor: Health monitor for accessing health stats
"""
self.cache_update_interval = cache_update_interval
self.logger_manager = logger_manager
self.lifecycle_manager = lifecycle_manager
self.health_monitor = health_monitor
# Statistics storage
self._stats = {
'total_collectors': 0,
'running_collectors': 0,
'failed_collectors': 0,
'restarts_performed': 0,
'last_global_check': None,
'uptime_start': None
}
# Cache management
self._cached_status: Optional[Dict[str, Any]] = None
self._cache_last_updated: Optional[datetime] = None
self._cache_update_task: Optional[asyncio.Task] = None
self._running = False
# Performance tracking for cache optimization
self._cache_hit_count = 0
self._cache_miss_count = 0
self._last_performance_log = datetime.now(timezone.utc)
def set_running_state(self, running: bool) -> None:
"""Set the running state of the tracker."""
self._running = running
if running:
self._stats['uptime_start'] = datetime.now(timezone.utc)
else:
self._stats['uptime_start'] = None
def get_stats(self) -> Dict[str, Any]:
"""Get current statistics."""
return self._stats.copy()
def update_stat(self, key: str, value: Any) -> None:
"""Update a specific statistic."""
self._stats[key] = value
def increment_stat(self, key: str, amount: int = 1) -> None:
"""Increment a numeric statistic."""
if key in self._stats and isinstance(self._stats[key], (int, float)):
self._stats[key] += amount
else:
self._stats[key] = amount
async def start_cache_updates(self) -> None:
"""Start the background cache update task."""
if self._cache_update_task and not self._cache_update_task.done():
if self.logger_manager:
self.logger_manager.log_warning("Cache updates are already running")
return
if self.logger_manager:
self.logger_manager.log_debug("Starting statistics cache updates")
self._cache_update_task = asyncio.create_task(self._cache_update_loop())
async def stop_cache_updates(self) -> None:
"""Stop the background cache update task."""
if self._cache_update_task and not self._cache_update_task.done():
self._cache_update_task.cancel()
try:
await self._cache_update_task
except asyncio.CancelledError:
pass
if self.logger_manager:
self.logger_manager.log_debug("Statistics cache updates stopped")
async def _cache_update_loop(self) -> None:
"""Background loop for updating cached statistics."""
while self._running:
try:
await asyncio.sleep(self.cache_update_interval)
await self._update_cached_status()
except asyncio.CancelledError:
if self.logger_manager:
self.logger_manager.log_debug("Statistics cache update loop cancelled")
break
except Exception as e:
if self.logger_manager:
self.logger_manager.log_error(f"Error in statistics cache update: {e}", exc_info=True)
await asyncio.sleep(self.cache_update_interval)
async def _update_cached_status(self) -> None:
"""Update the cached status information."""
try:
# Update basic stats from lifecycle manager
if self.lifecycle_manager:
lifecycle_stats = self.lifecycle_manager.get_stats()
self._stats.update(lifecycle_stats)
# Update health stats from health monitor
if self.health_monitor:
health_stats = self.health_monitor.get_health_stats()
self._stats.update(health_stats)
# Calculate uptime
uptime_seconds = None
if self._stats['uptime_start']:
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
# Build cached status
self._cached_status = self._build_status_dict(uptime_seconds)
self._cache_last_updated = datetime.now(timezone.utc)
if self.logger_manager:
self.logger_manager.log_debug("Statistics cache updated")
except Exception as e:
if self.logger_manager:
self.logger_manager.log_error(f"Failed to update statistics cache: {e}", exc_info=True)
def _build_status_dict(self, uptime_seconds: Optional[float]) -> Dict[str, Any]:
"""Build the complete status dictionary."""
# Get individual collector statuses
collector_statuses = {}
if self.lifecycle_manager:
collectors = self.lifecycle_manager.get_collectors()
enabled_collectors = self.lifecycle_manager.get_enabled_collectors()
for name, collector in collectors.items():
collector_statuses[name] = {
'status': collector.status.value,
'enabled': name in enabled_collectors,
'health': collector.get_health_status()
}
return {
'uptime_seconds': uptime_seconds,
'statistics': self._stats.copy(),
'collectors': collector_statuses,
'enabled_collectors': list(self.lifecycle_manager.get_enabled_collectors()) if self.lifecycle_manager else [],
'total_collectors': len(collector_statuses),
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None
}
def get_status(self, force_refresh: bool = False) -> Dict[str, Any]:
"""
Get manager status and statistics.
Args:
force_refresh: If True, bypass cache and calculate real-time
Returns:
Dictionary containing status information
"""
# Return cached status if available and not forcing refresh
if not force_refresh and self._cached_status and self._cache_last_updated:
# Check if cache is recent enough (within 2x the update interval)
cache_age = (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
if cache_age <= (self.cache_update_interval * 2):
self._cache_hit_count += 1
self._log_cache_performance_if_needed()
return self._cached_status.copy()
# Cache miss - increment counter
self._cache_miss_count += 1
# Calculate real-time status
uptime_seconds = None
if self._stats['uptime_start']:
uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()
return self._build_status_dict(uptime_seconds)
def get_collector_status(self, collector_name: str) -> Optional[Dict[str, Any]]:
"""
Get status for a specific collector.
Args:
collector_name: Name of the collector
Returns:
Collector status dict or None if not found
"""
if not self.lifecycle_manager:
return None
collector = self.lifecycle_manager.get_collector(collector_name)
if not collector:
return None
config = self.lifecycle_manager.get_collector_config(collector_name)
return {
'name': collector_name,
'config': config.__dict__ if config else {},
'status': collector.get_status(),
'health': collector.get_health_status()
}
def list_collectors(self) -> List[str]:
"""
List all managed collector names.
Returns:
List of collector names
"""
if self.lifecycle_manager:
return list(self.lifecycle_manager.get_collectors().keys())
return []
def get_running_collectors(self) -> List[str]:
"""
Get names of currently running collectors.
Returns:
List of running collector names
"""
running = []
if self.lifecycle_manager:
collectors = self.lifecycle_manager.get_collectors()
for name, collector in collectors.items():
if collector.status == CollectorStatus.RUNNING:
running.append(name)
return running
def get_failed_collectors(self) -> List[str]:
"""
Get names of failed or unhealthy collectors.
Returns:
List of failed collector names
"""
failed = []
if self.lifecycle_manager:
collectors = self.lifecycle_manager.get_collectors()
for name, collector in collectors.items():
health_status = collector.get_health_status()
if not health_status['is_healthy']:
failed.append(name)
return failed
def force_cache_refresh(self) -> None:
"""Force an immediate cache refresh."""
if self._running:
asyncio.create_task(self._update_cached_status())
def get_cache_info(self) -> Dict[str, Any]:
"""Get information about the cache state."""
total_requests = self._cache_hit_count + self._cache_miss_count
hit_rate = (self._cache_hit_count / total_requests * 100) if total_requests > 0 else 0
return {
'cache_enabled': True,
'cache_update_interval': self.cache_update_interval,
'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None,
'cache_age_seconds': (
(datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
if self._cache_last_updated else None
),
'cache_hit_count': self._cache_hit_count,
'cache_miss_count': self._cache_miss_count,
'cache_hit_rate_percent': round(hit_rate, 2),
'total_cache_requests': total_requests
}
def _log_cache_performance_if_needed(self) -> None:
"""Log cache performance metrics periodically."""
current_time = datetime.now(timezone.utc)
# Log every 5 minutes
if (current_time - self._last_performance_log).total_seconds() >= 300:
total_requests = self._cache_hit_count + self._cache_miss_count
if total_requests > 0:
hit_rate = (self._cache_hit_count / total_requests * 100)
if self.logger_manager:
self.logger_manager.log_debug(
f"Cache performance: {hit_rate:.1f}% hit rate "
f"({self._cache_hit_count} hits, {self._cache_miss_count} misses)"
)
self._last_performance_log = current_time