Implement data collection architecture with modular components

- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.

These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
This commit is contained in:
Vasily.onl
2025-06-10 13:40:28 +08:00
parent c28e4a9aaf
commit f6cb1485b1
18 changed files with 384 additions and 45 deletions

View File

@@ -8,8 +8,8 @@ enabling, disabling, starting, and restarting collectors.
import asyncio
import time
from typing import Dict, Set, Optional
from ..base_collector import BaseDataCollector, CollectorStatus
from ..collector_types import CollectorConfig
from ..collector.base_collector import BaseDataCollector, CollectorStatus
from ..collector.collector_types import CollectorConfig
class CollectorLifecycleManager:

View File

@@ -8,7 +8,7 @@ auto-restart functionality, and health status tracking.
import asyncio
from datetime import datetime, timezone
from typing import Set, Dict, Optional
from ..base_collector import BaseDataCollector, CollectorStatus
from ..collector.base_collector import BaseDataCollector, CollectorStatus
class ManagerHealthMonitor:

View File

@@ -8,7 +8,7 @@ to optimize performance by avoiding real-time calculations on every status reque
import asyncio
from datetime import datetime, timezone
from typing import Dict, Any, Optional, List
from ..base_collector import BaseDataCollector, CollectorStatus
from ..collector.base_collector import BaseDataCollector, CollectorStatus
class ManagerStatsTracker:
@@ -48,6 +48,11 @@ class ManagerStatsTracker:
self._cache_last_updated: Optional[datetime] = None
self._cache_update_task: Optional[asyncio.Task] = None
self._running = False
# Performance tracking for cache optimization
self._cache_hit_count = 0
self._cache_miss_count = 0
self._last_performance_log = datetime.now(timezone.utc)
def set_running_state(self, running: bool) -> None:
"""Set the running state of the tracker."""
@@ -180,8 +185,13 @@ class ManagerStatsTracker:
# Check if cache is recent enough (within 2x the update interval)
cache_age = (datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
if cache_age <= (self.cache_update_interval * 2):
self._cache_hit_count += 1
self._log_cache_performance_if_needed()
return self._cached_status.copy()
# Cache miss - increment counter
self._cache_miss_count += 1
# Calculate real-time status
uptime_seconds = None
if self._stats['uptime_start']:
@@ -264,6 +274,9 @@ class ManagerStatsTracker:
def get_cache_info(self) -> Dict[str, Any]:
"""Get information about the cache state."""
total_requests = self._cache_hit_count + self._cache_miss_count
hit_rate = (self._cache_hit_count / total_requests * 100) if total_requests > 0 else 0
return {
'cache_enabled': True,
'cache_update_interval': self.cache_update_interval,
@@ -271,5 +284,27 @@ class ManagerStatsTracker:
'cache_age_seconds': (
(datetime.now(timezone.utc) - self._cache_last_updated).total_seconds()
if self._cache_last_updated else None
)
}
),
'cache_hit_count': self._cache_hit_count,
'cache_miss_count': self._cache_miss_count,
'cache_hit_rate_percent': round(hit_rate, 2),
'total_cache_requests': total_requests
}
def _log_cache_performance_if_needed(self) -> None:
"""Log cache performance metrics periodically."""
current_time = datetime.now(timezone.utc)
# Log every 5 minutes
if (current_time - self._last_performance_log).total_seconds() >= 300:
total_requests = self._cache_hit_count + self._cache_miss_count
if total_requests > 0:
hit_rate = (self._cache_hit_count / total_requests * 100)
if self.logger_manager:
self.logger_manager.log_debug(
f"Cache performance: {hit_rate:.1f}% hit rate "
f"({self._cache_hit_count} hits, {self._cache_miss_count} misses)"
)
self._last_performance_log = current_time