TCPDashboard/data/manager_components/manager_stats_tracker.py

"""
Manager Statistics Tracker for managing collector statistics and caching.

This module handles statistics collection, caching, and periodic updates
to optimize performance by avoiding real-time calculations on every status request.
"""

import asyncio
from datetime import datetime, timezone
from typing import Dict, Any, Optional, List
from ..collector.base_collector import BaseDataCollector, CollectorStatus


class ManagerStatsTracker:
    """Manages statistics tracking and caching for the collector manager."""

    def __init__(self,
                 cache_update_interval: float = 30.0,
                 logger_manager=None,
                 lifecycle_manager=None,
                 health_monitor=None):
        """
        Initialize the statistics tracker.

        Args:
            cache_update_interval: Seconds between cache updates
            logger_manager: Logger manager instance for logging operations
            lifecycle_manager: Lifecycle manager for accessing collectors
            health_monitor: Health monitor for accessing health stats
        """
        self.cache_update_interval = cache_update_interval
        self.logger_manager = logger_manager
        self.lifecycle_manager = lifecycle_manager
        self.health_monitor = health_monitor

        # Statistics storage
        self._stats = {
            'total_collectors': 0,
            'running_collectors': 0,
            'failed_collectors': 0,
            'restarts_performed': 0,
            'last_global_check': None,
            'uptime_start': None
        }

        # Cache management
        self._cached_status: Optional[Dict[str, Any]] = None
        self._cache_last_updated: Optional[datetime] = None
        self._cache_update_task: Optional[asyncio.Task] = None
        self._running = False

        # Performance tracking for cache optimization
        self._cache_hit_count = 0
        self._cache_miss_count = 0
        self._last_performance_log = datetime.now(timezone.utc)

    def set_running_state(self, running: bool) -> None:
        """Set the running state of the tracker."""
        self._running = running
        if running:
            self._stats['uptime_start'] = datetime.now(timezone.utc)
        else:
            self._stats['uptime_start'] = None

    def get_stats(self) -> Dict[str, Any]:
        """Get current statistics."""
        return self._stats.copy()

    def update_stat(self, key: str, value: Any) -> None:
        """Update a specific statistic."""
        self._stats[key] = value

    def increment_stat(self, key: str, amount: int = 1) -> None:
        """Increment a numeric statistic."""
        if key in self._stats and isinstance(self._stats[key], (int, float)):
            self._stats[key] += amount
        else:
            self._stats[key] = amount

    async def start_cache_updates(self) -> None:
        """Start the background cache update task."""
        if self._cache_update_task and not self._cache_update_task.done():
            if self.logger_manager:
                self.logger_manager.log_warning("Cache updates are already running")
            return

        if self.logger_manager:
            self.logger_manager.log_debug("Starting statistics cache updates")

        self._cache_update_task = asyncio.create_task(self._cache_update_loop())

    async def stop_cache_updates(self) -> None:
        """Stop the background cache update task."""
        if self._cache_update_task and not self._cache_update_task.done():
            self._cache_update_task.cancel()
            try:
                await self._cache_update_task
            except asyncio.CancelledError:
                pass

        if self.logger_manager:
            self.logger_manager.log_debug("Statistics cache updates stopped")

    async def _cache_update_loop(self) -> None:
        """Background loop for updating cached statistics."""
        while self._running:
            try:
                await asyncio.sleep(self.cache_update_interval)
                await self._update_cached_status()

            except asyncio.CancelledError:
                if self.logger_manager:
                    self.logger_manager.log_debug("Statistics cache update loop cancelled")
                break
            except Exception as e:
                if self.logger_manager:
                    self.logger_manager.log_error(f"Error in statistics cache update: {e}", exc_info=True)
                await asyncio.sleep(self.cache_update_interval)

    async def _update_cached_status(self) -> None:
        """Update the cached status information."""
        try:
            # Update basic stats from lifecycle manager
            if self.lifecycle_manager:
                lifecycle_stats = self.lifecycle_manager.get_stats()
                self._stats.update(lifecycle_stats)

            # Update health stats from health monitor
            if self.health_monitor:
                health_stats = self.health_monitor.get_health_stats()
                self._stats.update(health_stats)

            # Calculate uptime
            uptime_seconds = None
            if self._stats['uptime_start']:
                uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()

            # Build cached status
            self._cached_status = self._build_status_dict(uptime_seconds)
            self._cache_last_updated = datetime.now(timezone.utc)

            if self.logger_manager:
                self.logger_manager.log_debug("Statistics cache updated")

        except Exception as e:
            if self.logger_manager:
                self.logger_manager.log_error(f"Failed to update statistics cache: {e}", exc_info=True)

    def _build_status_dict(self, uptime_seconds: Optional[float]) -> Dict[str, Any]:
        """Build the complete status dictionary."""
        # Get individual collector statuses
        collector_statuses = {}
        if self.lifecycle_manager:
            collectors = self.lifecycle_manager.get_collectors()
            enabled_collectors = self.lifecycle_manager.get_enabled_collectors()

            for name, collector in collectors.items():
                collector_statuses[name] = {
                    'status': collector.status.value,
                    'enabled': name in enabled_collectors,
                    'health': collector.get_health_status()
                }

        return {
            'uptime_seconds': uptime_seconds,
            'statistics': self._stats.copy(),
            'collectors': collector_statuses,
            'enabled_collectors': list(self.lifecycle_manager.get_enabled_collectors()) if self.lifecycle_manager else [],
            'total_collectors': len(collector_statuses),
            'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None
        }

    def get_status(self, force_refresh: bool = False) -> Dict[str, Any]:
        """
        Get manager status and statistics.

        Args:
            force_refresh: If True, bypass cache and calculate real-time

        Returns:
            Dictionary containing status information
        """
        # Return cached status if available and not forcing refresh
        if not force_refresh and self._cached_status and self._cache_last_updated:
            # Check if cache is recent enough (within 2x the update interval)
            cache_age = (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
            if cache_age <= (self.cache_update_interval * 2):
                self._cache_hit_count += 1
                self._log_cache_performance_if_needed()
                return self._cached_status.copy()

        # Cache miss - increment counter
        self._cache_miss_count += 1

        # Calculate real-time status
        uptime_seconds = None
        if self._stats['uptime_start']:
            uptime_seconds = (datetime.now(timezone.utc) - self._stats['uptime_start']).total_seconds()

        return self._build_status_dict(uptime_seconds)

    def get_collector_status(self, collector_name: str) -> Optional[Dict[str, Any]]:
        """
        Get status for a specific collector.

        Args:
            collector_name: Name of the collector

        Returns:
            Collector status dict or None if not found
        """
        if not self.lifecycle_manager:
            return None

        collector = self.lifecycle_manager.get_collector(collector_name)
        if not collector:
            return None

        config = self.lifecycle_manager.get_collector_config(collector_name)

        return {
            'name': collector_name,
            'config': config.__dict__ if config else {},
            'status': collector.get_status(),
            'health': collector.get_health_status()
        }

    def list_collectors(self) -> List[str]:
        """
        List all managed collector names.

        Returns:
            List of collector names
        """
        if self.lifecycle_manager:
            return list(self.lifecycle_manager.get_collectors().keys())
        return []

    def get_running_collectors(self) -> List[str]:
        """
        Get names of currently running collectors.

        Returns:
            List of running collector names
        """
        running = []
        if self.lifecycle_manager:
            collectors = self.lifecycle_manager.get_collectors()
            for name, collector in collectors.items():
                if collector.status == CollectorStatus.RUNNING:
                    running.append(name)
        return running

    def get_failed_collectors(self) -> List[str]:
        """
        Get names of failed or unhealthy collectors.

        Returns:
            List of failed collector names
        """
        failed = []
        if self.lifecycle_manager:
            collectors = self.lifecycle_manager.get_collectors()
            for name, collector in collectors.items():
                health_status = collector.get_health_status()
                if not health_status['is_healthy']:
                    failed.append(name)
        return failed

    def force_cache_refresh(self) -> None:
        """Force an immediate cache refresh."""
        if self._running:
            asyncio.create_task(self._update_cached_status())

    def get_cache_info(self) -> Dict[str, Any]:
        """Get information about the cache state."""
        total_requests = self._cache_hit_count + self._cache_miss_count
        hit_rate = (self._cache_hit_count / total_requests * 100) if total_requests > 0 else 0

        return {
            'cache_enabled': True,
            'cache_update_interval': self.cache_update_interval,
            'cache_last_updated': self._cache_last_updated.isoformat() if self._cache_last_updated else None,
            'cache_age_seconds': (
                (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
                if self._cache_last_updated else None
            ),
            'cache_hit_count': self._cache_hit_count,
            'cache_miss_count': self._cache_miss_count,
            'cache_hit_rate_percent': round(hit_rate, 2),
            'total_cache_requests': total_requests
        }

    def _log_cache_performance_if_needed(self) -> None:
        """Log cache performance metrics periodically."""
        current_time = datetime.now(timezone.utc)

        # Log every 5 minutes
        if (current_time - self._last_performance_log).total_seconds() >= 300:
            total_requests = self._cache_hit_count + self._cache_miss_count
            if total_requests > 0:
                hit_rate = (self._cache_hit_count / total_requests * 100)

                if self.logger_manager:
                    self.logger_manager.log_debug(
                        f"Cache performance: {hit_rate:.1f}% hit rate "
                        f"({self._cache_hit_count} hits, {self._cache_miss_count} misses)"
                    )

                self._last_performance_log = current_time