""" Enhanced system health callbacks for the dashboard. """ import asyncio import json import subprocess import psutil from datetime import datetime, timedelta from typing import Dict, Any, Optional, List from dash import Output, Input, State, html, callback_context, no_update import dash_mantine_components as dmc from utils.logger import get_logger from database.connection import DatabaseManager from database.redis_manager import RedisManager logger = get_logger("system_health_callbacks") def register_system_health_callbacks(app): """Register enhanced system health callbacks with comprehensive monitoring.""" # Quick Status Updates (Top Cards) @app.callback( [Output('data-collection-quick-status', 'children'), Output('database-quick-status', 'children'), Output('redis-quick-status', 'children'), Output('performance-quick-status', 'children')], Input('interval-component', 'n_intervals') ) def update_quick_status(n_intervals): """Update quick status indicators.""" try: # Data Collection Status dc_status = _get_data_collection_quick_status() # Database Status db_status = _get_database_quick_status() # Redis Status redis_status = _get_redis_quick_status() # Performance Status perf_status = _get_performance_quick_status() return dc_status, db_status, redis_status, perf_status except Exception as e: logger.error(f"Error updating quick status: {e}") error_status = dmc.Badge("🔴 Error", color="red", variant="light") return error_status, error_status, error_status, error_status # Detailed Data Collection Service Status @app.callback( [Output('data-collection-service-status', 'children'), Output('data-collection-metrics', 'children')], [Input('interval-component', 'n_intervals'), Input('refresh-data-status-btn', 'n_clicks')] ) def update_data_collection_status(n_intervals, refresh_clicks): """Update detailed data collection service status and metrics.""" try: service_status = _get_data_collection_service_status() metrics = _get_data_collection_metrics() return service_status, metrics except Exception as e: logger.error(f"Error updating data collection status: {e}") error_div = dmc.Alert( f"Error: {str(e)}", title="🔴 Status Check Failed", color="red", variant="light" ) return error_div, error_div # Individual Collectors Status @app.callback( Output('individual-collectors-status', 'children'), [Input('interval-component', 'n_intervals'), Input('refresh-data-status-btn', 'n_clicks')] ) def update_individual_collectors_status(n_intervals, refresh_clicks): """Update individual data collector health status.""" try: return _get_individual_collectors_status() except Exception as e: logger.error(f"Error updating individual collectors status: {e}") return dmc.Alert( f"Error: {str(e)}", title="🔴 Collectors Check Failed", color="red", variant="light" ) # Database Status and Statistics @app.callback( [Output('database-status', 'children'), Output('database-stats', 'children')], Input('interval-component', 'n_intervals') ) def update_database_status(n_intervals): """Update database connection status and statistics.""" try: db_status = _get_database_status() db_stats = _get_database_statistics() return db_status, db_stats except Exception as e: logger.error(f"Error updating database status: {e}") error_alert = dmc.Alert( f"Error: {str(e)}", title="🔴 Database Check Failed", color="red", variant="light" ) return error_alert, error_alert # Redis Status and Statistics @app.callback( [Output('redis-status', 'children'), Output('redis-stats', 'children')], Input('interval-component', 'n_intervals') ) def update_redis_status(n_intervals): """Update Redis connection status and statistics.""" try: redis_status = _get_redis_status() redis_stats = _get_redis_statistics() return redis_status, redis_stats except Exception as e: logger.error(f"Error updating Redis status: {e}") error_alert = dmc.Alert( f"Error: {str(e)}", title="🔴 Redis Check Failed", color="red", variant="light" ) return error_alert, error_alert # System Performance Metrics @app.callback( Output('system-performance-metrics', 'children'), Input('interval-component', 'n_intervals') ) def update_system_performance(n_intervals): """Update system performance metrics.""" try: return _get_system_performance_metrics() except Exception as e: logger.error(f"Error updating system performance: {e}") return dmc.Alert( f"Error: {str(e)}", title="🔴 Performance Check Failed", color="red", variant="light" ) # Data Collection Details Modal @app.callback( [Output("collection-details-modal", "opened"), Output("collection-details-content", "children")], [Input("view-collection-details-btn", "n_clicks")], State("collection-details-modal", "opened") ) def toggle_collection_details_modal(details_clicks, is_open): """Toggle and populate the collection details modal.""" if details_clicks: # Load detailed collection information details_content = _get_collection_details_content() return True, details_content return is_open, no_update # Collection Logs Modal @app.callback( [Output("collection-logs-modal", "opened"), Output("collection-logs-content", "children")], [Input("view-collection-logs-btn", "n_clicks"), Input("refresh-logs-btn", "n_clicks"), Input("close-logs-modal", "n_clicks")], State("collection-logs-modal", "opened") ) def toggle_collection_logs_modal(logs_clicks, refresh_clicks, close_clicks, is_open): """Toggle and populate the collection logs modal.""" if logs_clicks or refresh_clicks: # Load recent logs logs_content = _get_collection_logs_content() return True, logs_content elif close_clicks: return False, no_update return is_open, no_update logger.info("Enhanced system health callbacks registered successfully") # Helper Functions def _get_data_collection_quick_status() -> dmc.Badge: """Get quick data collection status.""" try: # Check if data collection service is running (simplified check) is_running = _check_data_collection_service_running() if is_running: return dmc.Badge("🟢 Active", color="green", variant="light") else: return dmc.Badge("🔴 Stopped", color="red", variant="light") except: return dmc.Badge("🟡 Unknown", color="yellow", variant="light") def _get_database_quick_status() -> dmc.Badge: """Get quick database status.""" try: db_manager = DatabaseManager() db_manager.initialize() # Initialize the database manager result = db_manager.test_connection() if result: return dmc.Badge("🟢 Connected", color="green", variant="light") else: return dmc.Badge("🔴 Error", color="red", variant="light") except: return dmc.Badge("🔴 Error", color="red", variant="light") def _get_redis_quick_status() -> dmc.Badge: """Get quick Redis status.""" try: redis_manager = RedisManager() redis_manager.initialize() # Initialize the Redis manager result = redis_manager.test_connection() if result: return dmc.Badge("🟢 Connected", color="green", variant="light") else: return dmc.Badge("🔴 Error", color="red", variant="light") except: return dmc.Badge("🔴 Error", color="red", variant="light") def _get_performance_quick_status() -> dmc.Badge: """Get quick performance status.""" try: cpu_percent = psutil.cpu_percent(interval=0.1) memory = psutil.virtual_memory() if cpu_percent < 80 and memory.percent < 80: return dmc.Badge("🟢 Good", color="green", variant="light") elif cpu_percent < 90 and memory.percent < 90: return dmc.Badge("🟡 Warning", color="yellow", variant="light") else: return dmc.Badge("🔴 High", color="red", variant="light") except: return dmc.Badge("❓ Unknown", color="gray", variant="light") def _get_data_collection_service_status() -> html.Div: """Get detailed data collection service status.""" try: is_running = _check_data_collection_service_running() current_time = datetime.now() if is_running: return dmc.Stack([ dmc.Group([ dmc.Badge("🟢 Service Running", color="green", variant="light"), dmc.Text(f"Checked: {current_time.strftime('%H:%M:%S')}", size="xs", c="dimmed") ], justify="space-between"), dmc.Text("Data collection service is actively collecting market data.", size="sm", c="#2c3e50") ], gap="xs") else: return dmc.Stack([ dmc.Group([ dmc.Badge("🔴 Service Stopped", color="red", variant="light"), dmc.Text(f"Checked: {current_time.strftime('%H:%M:%S')}", size="xs", c="dimmed") ], justify="space-between"), dmc.Text("Data collection service is not running.", size="sm", c="#e74c3c"), dmc.Code("python scripts/start_data_collection.py", style={'margin-top': '5px'}) ], gap="xs") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Status Check Failed", color="red", variant="light" ) def _get_data_collection_metrics() -> html.Div: """Get data collection metrics.""" try: # Get database statistics for collected data db_manager = DatabaseManager() db_manager.initialize() # Initialize the database manager with db_manager.get_session() as session: from sqlalchemy import text # Count OHLCV candles from market_data table candles_count = session.execute( text("SELECT COUNT(*) FROM market_data") ).scalar() or 0 # Count raw tickers from raw_trades table tickers_count = session.execute( text("SELECT COUNT(*) FROM raw_trades WHERE data_type = 'ticker'") ).scalar() or 0 # Get latest data timestamp from both tables latest_market_data = session.execute( text("SELECT MAX(timestamp) FROM market_data") ).scalar() latest_raw_data = session.execute( text("SELECT MAX(timestamp) FROM raw_trades") ).scalar() # Use the most recent timestamp latest_data = None if latest_market_data and latest_raw_data: latest_data = max(latest_market_data, latest_raw_data) elif latest_market_data: latest_data = latest_market_data elif latest_raw_data: latest_data = latest_raw_data # Calculate data freshness data_freshness_badge = dmc.Badge("No data", color="gray", variant="light") if latest_data: time_diff = datetime.utcnow() - latest_data.replace(tzinfo=None) if latest_data.tzinfo else datetime.utcnow() - latest_data if time_diff < timedelta(minutes=5): data_freshness_badge = dmc.Badge(f"🟢 Fresh ({time_diff.seconds // 60}m ago)", color="green", variant="light") elif time_diff < timedelta(hours=1): data_freshness_badge = dmc.Badge(f"🟡 Recent ({time_diff.seconds // 60}m ago)", color="yellow", variant="light") else: data_freshness_badge = dmc.Badge(f"🔴 Stale ({time_diff.total_seconds() // 3600:.1f}h ago)", color="red", variant="light") return dmc.Stack([ dmc.Group([ dmc.Text(f"Candles: {candles_count:,}", fw=500), dmc.Text(f"Tickers: {tickers_count:,}", fw=500) ], justify="space-between"), dmc.Group([ dmc.Text("Data Freshness:", fw=500), data_freshness_badge ], justify="space-between") ], gap="xs") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Metrics Unavailable", color="red", variant="light" ) def _get_individual_collectors_status() -> html.Div: """Get individual data collector status.""" try: # This would connect to a running data collection service # For now, show a placeholder indicating the status return dmc.Alert([ dmc.Text("Individual collector health data would be displayed here when the data collection service is running.", size="sm"), dmc.Space(h="sm"), dmc.Group([ dmc.Text("To start monitoring:", size="sm"), dmc.Code("python scripts/start_data_collection.py") ]) ], title="📊 Collector Health Monitoring", color="blue", variant="light") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Collector Status Check Failed", color="red", variant="light" ) def _get_database_status() -> html.Div: """Get detailed database status.""" try: db_manager = DatabaseManager() db_manager.initialize() # Initialize the database manager with db_manager.get_session() as session: # Test connection and get basic info from sqlalchemy import text result = session.execute(text("SELECT version()")).fetchone() version = result[0] if result else "Unknown" # Get connection count connections = session.execute( text("SELECT count(*) FROM pg_stat_activity") ).scalar() or 0 return dmc.Stack([ dmc.Group([ dmc.Badge("🟢 Database Connected", color="green", variant="light"), dmc.Text(f"Checked: {datetime.now().strftime('%H:%M:%S')}", size="xs", c="dimmed") ], justify="space-between"), dmc.Text(f"Version: PostgreSQL {version.split()[1] if 'PostgreSQL' in version else 'Unknown'}", size="xs", c="dimmed"), dmc.Text(f"Active connections: {connections}", size="xs", c="dimmed") ], gap="xs") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Database Connection Failed", color="red", variant="light" ) def _get_database_statistics() -> html.Div: """Get database statistics.""" try: db_manager = DatabaseManager() db_manager.initialize() # Initialize the database manager with db_manager.get_session() as session: # Get table sizes from sqlalchemy import text table_stats = session.execute(text(""" SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size FROM pg_tables WHERE schemaname NOT IN ('information_schema', 'pg_catalog') ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC LIMIT 5 """)).fetchall() # Get recent activity from both main data tables market_data_activity = session.execute( text("SELECT COUNT(*) FROM market_data WHERE timestamp > NOW() - INTERVAL '1 hour'") ).scalar() or 0 raw_data_activity = session.execute( text("SELECT COUNT(*) FROM raw_trades WHERE timestamp > NOW() - INTERVAL '1 hour'") ).scalar() or 0 total_recent_activity = market_data_activity + raw_data_activity stats_components = [ dmc.Group([ dmc.Text("Recent Activity (1h):", fw=500), dmc.Text(f"{total_recent_activity:,} records", c="#2c3e50") ], justify="space-between"), dmc.Group([ dmc.Text("• Market Data:", fw=400), dmc.Text(f"{market_data_activity:,}", c="#7f8c8d") ], justify="space-between"), dmc.Group([ dmc.Text("• Raw Data:", fw=400), dmc.Text(f"{raw_data_activity:,}", c="#7f8c8d") ], justify="space-between") ] if table_stats: stats_components.append(dmc.Text("Largest Tables:", fw=500)) for schema, table, size in table_stats: stats_components.append( dmc.Text(f"• {table}: {size}", size="xs", c="dimmed", style={'margin-left': '10px'}) ) return dmc.Stack(stats_components, gap="xs") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Statistics Unavailable", color="red", variant="light" ) def _get_redis_status() -> html.Div: """Get Redis status.""" try: redis_manager = RedisManager() redis_manager.initialize() # Initialize the Redis manager info = redis_manager.get_info() return dmc.Stack([ dmc.Group([ dmc.Badge("🟢 Redis Connected", color="green", variant="light"), dmc.Text(f"Checked: {datetime.now().strftime('%H:%M:%S')}", size="xs", c="dimmed") ], justify="space-between"), dmc.Text(f"Host: {redis_manager.config.host}:{redis_manager.config.port}", size="xs", c="dimmed") ], gap="xs") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Redis Connection Failed", color="red", variant="light" ) def _get_redis_statistics() -> html.Div: """Get Redis statistics.""" try: redis_manager = RedisManager() redis_manager.initialize() # Initialize the Redis manager # Get Redis info info = redis_manager.get_info() return dmc.Stack([ dmc.Group([ dmc.Text("Memory Used:", fw=500), dmc.Text(f"{info.get('used_memory_human', 'Unknown')}", c="#2c3e50") ], justify="space-between"), dmc.Group([ dmc.Text("Connected Clients:", fw=500), dmc.Text(f"{info.get('connected_clients', 'Unknown')}", c="#2c3e50") ], justify="space-between"), dmc.Group([ dmc.Text("Uptime:", fw=500), dmc.Text(f"{info.get('uptime_in_seconds', 0) // 3600}h", c="#2c3e50") ], justify="space-between") ], gap="xs") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Statistics Unavailable", color="red", variant="light" ) def _get_system_performance_metrics() -> html.Div: """Get system performance metrics.""" try: # CPU usage cpu_percent = psutil.cpu_percent(interval=0.1) cpu_count = psutil.cpu_count() # Memory usage memory = psutil.virtual_memory() # Disk usage disk = psutil.disk_usage('/') # Network I/O (if available) try: network = psutil.net_io_counters() network_sent = f"{network.bytes_sent / (1024**3):.2f} GB" network_recv = f"{network.bytes_recv / (1024**3):.2f} GB" except: network_sent = "N/A" network_recv = "N/A" # Color coding for metrics cpu_color = "green" if cpu_percent < 70 else "yellow" if cpu_percent < 85 else "red" memory_color = "green" if memory.percent < 70 else "yellow" if memory.percent < 85 else "red" disk_color = "green" if disk.percent < 70 else "yellow" if disk.percent < 85 else "red" return dmc.Stack([ dmc.Group([ dmc.Text("CPU Usage:", fw=500), dmc.Badge(f"{cpu_percent:.1f}%", color=cpu_color, variant="light"), dmc.Text(f"({cpu_count} cores)", size="xs", c="dimmed") ], justify="space-between"), dmc.Group([ dmc.Text("Memory:", fw=500), dmc.Badge(f"{memory.percent:.1f}%", color=memory_color, variant="light"), dmc.Text(f"{memory.used // (1024**3)} GB / {memory.total // (1024**3)} GB", size="xs", c="dimmed") ], justify="space-between"), dmc.Group([ dmc.Text("Disk Usage:", fw=500), dmc.Badge(f"{disk.percent:.1f}%", color=disk_color, variant="light"), dmc.Text(f"{disk.used // (1024**3)} GB / {disk.total // (1024**3)} GB", size="xs", c="dimmed") ], justify="space-between"), dmc.Group([ dmc.Text("Network I/O:", fw=500), dmc.Text(f"↑ {network_sent} ↓ {network_recv}", size="xs", c="dimmed") ], justify="space-between") ], gap="sm") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Performance Metrics Unavailable", color="red", variant="light" ) def _get_collection_details_content() -> html.Div: """Get detailed collection information for modal.""" try: # Detailed service and collector information return dmc.Stack([ dmc.Title("📊 Data Collection Service Details", order=5), dmc.Text("Comprehensive data collection service information would be displayed here."), dmc.Divider(), dmc.Title("Configuration", order=6), dmc.Text("Service configuration details..."), dmc.Title("Performance Metrics", order=6), dmc.Text("Detailed performance analytics..."), dmc.Title("Health Status", order=6), dmc.Text("Individual collector health information...") ], gap="md") except Exception as e: return dmc.Alert( f"Error: {str(e)}", title="🔴 Error Loading Details", color="red", variant="light" ) def _get_collection_logs_content() -> str: """Get recent collection service logs.""" try: # This would read from actual log files # For now, return a placeholder current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") return f"""[{current_time}] INFO - Data Collection Service Logs Recent log entries would be displayed here from the data collection service. This would include: - Service startup/shutdown events - Collector connection status changes - Data collection statistics - Error messages and warnings - Performance metrics To view real logs, check the logs/ directory or configure log file monitoring. """ except Exception as e: return f"Error loading logs: {str(e)}" def _check_data_collection_service_running() -> bool: """Check if data collection service is running.""" try: # Check for running processes (simplified) for proc in psutil.process_iter(['pid', 'name', 'cmdline']): try: if proc.info['cmdline']: cmdline = ' '.join(proc.info['cmdline']) if 'start_data_collection.py' in cmdline or 'collection_service' in cmdline: return True except (psutil.NoSuchProcess, psutil.AccessDenied): continue return False except: return False