- Introduced a new `system_health_constants.py` file to define thresholds and constants for system health metrics. - Refactored existing system health callbacks into modular components, enhancing maintainability and clarity. - Implemented dynamic loading of time range options in `charts.py`, improving flexibility in time range selection. - Added detailed documentation for new callback functions, ensuring clarity on their purpose and usage. - Enhanced error handling and logging practices across the new modules to ensure robust monitoring and debugging capabilities. These changes significantly improve the architecture and maintainability of the system health monitoring features, aligning with project standards for modularity and performance.
120 lines
4.7 KiB
Python
120 lines
4.7 KiB
Python
import asyncio
|
|
import json
|
|
import subprocess
|
|
import psutil
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Any, Optional, List
|
|
from dash import Output, Input, State, html, callback_context, no_update
|
|
import dash_bootstrap_components as dbc
|
|
from utils.logger import get_logger
|
|
from database.connection import DatabaseManager
|
|
from database.redis_manager import get_sync_redis_manager
|
|
|
|
from config.constants.system_health_constants import (
|
|
CPU_GOOD_THRESHOLD, CPU_WARNING_THRESHOLD,
|
|
MEMORY_GOOD_THRESHOLD, MEMORY_WARNING_THRESHOLD
|
|
)
|
|
|
|
logger = get_logger("default_logger")
|
|
|
|
|
|
def register_quick_status_callbacks(app):
|
|
"""Register quick status callbacks (top cards)."""
|
|
|
|
@app.callback(
|
|
[Output('data-collection-quick-status', 'children'),
|
|
Output('database-quick-status', 'children'),
|
|
Output('redis-quick-status', 'children'),
|
|
Output('performance-quick-status', 'children')],
|
|
Input('interval-component', 'n_intervals')
|
|
)
|
|
def update_quick_status(n_intervals):
|
|
"""Update quick status indicators."""
|
|
try:
|
|
dc_status = _get_data_collection_quick_status()
|
|
db_status = _get_database_quick_status()
|
|
redis_status = _get_redis_quick_status()
|
|
perf_status = _get_performance_quick_status()
|
|
|
|
return dc_status, db_status, redis_status, perf_status
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating quick status: {e}")
|
|
error_status = dbc.Badge("🔴 Error", color="danger", className="me-1")
|
|
return error_status, error_status, error_status, error_status
|
|
|
|
|
|
def _get_data_collection_quick_status() -> dbc.Badge:
|
|
"""Get quick data collection status."""
|
|
try:
|
|
is_running = _check_data_collection_service_running()
|
|
if is_running:
|
|
return dbc.Badge("Active", color="success", className="me-1")
|
|
else:
|
|
return dbc.Badge("Stopped", color="danger", className="me-1")
|
|
except Exception as e:
|
|
logger.error(f"Error checking data collection quick status: {e}")
|
|
return dbc.Badge("Unknown", color="warning", className="me-1")
|
|
|
|
|
|
def _get_database_quick_status() -> dbc.Badge:
|
|
"""Get quick database status."""
|
|
try:
|
|
db_manager = DatabaseManager()
|
|
db_manager.initialize()
|
|
if db_manager.test_connection():
|
|
return dbc.Badge("Connected", color="success", className="me-1")
|
|
else:
|
|
return dbc.Badge("Error", color="danger", className="me-1")
|
|
except Exception as e:
|
|
logger.error(f"Error checking database quick status: {e}")
|
|
return dbc.Badge("Error", color="danger", className="me-1")
|
|
|
|
|
|
def _get_redis_quick_status() -> dbc.Badge:
|
|
"""Get quick Redis status."""
|
|
try:
|
|
redis_manager = get_sync_redis_manager()
|
|
redis_manager.initialize()
|
|
if redis_manager.client.ping():
|
|
return dbc.Badge("Connected", color="success", className="me-1")
|
|
else:
|
|
return dbc.Badge("Error", color="danger", className="me-1")
|
|
except Exception as e:
|
|
logger.error(f"Redis quick status check failed: {e}")
|
|
return dbc.Badge("Error", color="danger", className="me-1")
|
|
|
|
|
|
def _get_performance_quick_status() -> dbc.Badge:
|
|
"""Get quick performance status."""
|
|
try:
|
|
cpu_percent = psutil.cpu_percent(interval=0.1)
|
|
memory = psutil.virtual_memory()
|
|
|
|
if cpu_percent < CPU_GOOD_THRESHOLD and memory.percent < MEMORY_GOOD_THRESHOLD:
|
|
return dbc.Badge("Good", color="success", className="me-1")
|
|
elif cpu_percent < CPU_WARNING_THRESHOLD and memory.percent < MEMORY_WARNING_THRESHOLD:
|
|
return dbc.Badge("Warning", color="warning", className="me-1")
|
|
else:
|
|
return dbc.Badge("High", color="danger", className="me-1")
|
|
except Exception as e:
|
|
logger.error(f"Error checking performance quick status: {e}")
|
|
return dbc.Badge("Unknown", color="secondary", className="me-1")
|
|
|
|
|
|
def _check_data_collection_service_running() -> bool:
|
|
"""Check if data collection service is running."""
|
|
try:
|
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
|
try:
|
|
if proc.info['cmdline']:
|
|
cmdline = ' '.join(proc.info['cmdline'])
|
|
if 'start_data_collection.py' in cmdline or 'collection_service' in cmdline:
|
|
return True
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied) as e:
|
|
logger.warning(f"Access or process error checking service: {e}")
|
|
continue
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Error checking data collection service running status: {e}")
|
|
return False |