#!/usr/bin/env python3 """ Data Collection Service Production-ready service for cryptocurrency market data collection with clean logging and robust error handling. """ import asyncio import signal import sys import time from datetime import datetime from pathlib import Path from typing import List, Optional, Dict, Any import logging # Add project root to path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) # Set environment for clean production logging import os os.environ['DEBUG'] = 'false' # Suppress verbose SQLAlchemy logging logging.getLogger('sqlalchemy').setLevel(logging.WARNING) logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) logging.getLogger('sqlalchemy.pool').setLevel(logging.WARNING) logging.getLogger('sqlalchemy.dialects').setLevel(logging.WARNING) logging.getLogger('sqlalchemy.orm').setLevel(logging.WARNING) from data.collector_manager import CollectorManager from config.service_config import ServiceConfig from data.collector_factory import CollectorFactory from database.connection import init_database from utils.logger import get_logger class DataCollectionService: """Production data collection service with modular architecture.""" def __init__(self, config_path: str = "config/data_collection.json"): """Initialize the data collection service.""" self.config_path = config_path self.logger = get_logger("data_collection_service", log_level="INFO", verbose=False) # Initialize configuration and factory self.service_config = ServiceConfig(config_path, logger=self.logger) self.config = self.service_config.load_config() self.collector_factory = CollectorFactory(logger=self.logger) # Core components self.collector_manager = CollectorManager(logger=self.logger, log_errors_only=True) self.collectors: List = [] # Service state self.running = False self.start_time = None self.shutdown_event = asyncio.Event() # Statistics for monitoring self.stats = { 'collectors_created': 0, 'collectors_running': 0, 'total_uptime_seconds': 0, 'last_activity': None, 'errors_count': 0 } self.logger.info("🚀 Data Collection Service initialized") self.logger.info(f"📁 Configuration: {config_path}") async def initialize_collectors(self) -> bool: """Initialize all data collectors based on configuration.""" try: collectors = await self.collector_factory.create_collectors_from_config(self.config) if not collectors: self.logger.error("❌ No collectors were successfully created") return False for collector in collectors: self.collector_manager.add_collector(collector) self.collectors.append(collector) self.stats['collectors_created'] = len(collectors) self.logger.info(f"✅ Successfully initialized {len(collectors)} data collectors") return True except Exception as e: self.logger.error(f"❌ Failed to initialize collectors: {e}", exc_info=True) self.stats['errors_count'] += 1 return False async def start(self) -> bool: """Start the data collection service.""" try: self.start_time = time.time() self.running = True self.logger.info("🚀 Starting Data Collection Service...") self.logger.info("📊 Initializing database connection...") init_database() self.logger.info("✅ Database connection established") # Start collector manager self.logger.info("🔌 Starting data collectors...") success = await self.collector_manager.start() if success: self.stats['collectors_running'] = len(self.collectors) self.stats['last_activity'] = datetime.now() self.logger.info("✅ Data Collection Service started successfully") self.logger.info(f"📈 Active collectors: {self.stats['collectors_running']}") return True else: self.logger.error("❌ Failed to start data collectors") self.stats['errors_count'] += 1 return False except Exception as e: self.logger.error(f"❌ Failed to start service: {e}", exc_info=True) self.stats['errors_count'] += 1 return False async def stop(self) -> None: """Stop the data collection service gracefully.""" try: self.logger.info("🛑 Stopping Data Collection Service...") self.running = False # Stop all collectors await self.collector_manager.stop() # Update statistics if self.start_time: self.stats['total_uptime_seconds'] = time.time() - self.start_time self.stats['collectors_running'] = 0 self.logger.info("✅ Data Collection Service stopped gracefully") self.logger.info(f"📊 Total uptime: {self.stats['total_uptime_seconds']:.1f} seconds") except Exception as e: self.logger.error(f"❌ Error during service shutdown: {e}", exc_info=True) self.stats['errors_count'] += 1 def get_status(self) -> Dict[str, Any]: """Get current service status.""" current_time = time.time() uptime = current_time - self.start_time if self.start_time else 0 return { 'running': self.running, 'uptime_seconds': uptime, 'uptime_hours': uptime / 3600, 'collectors_total': len(self.collectors), 'collectors_running': self.stats['collectors_running'], 'errors_count': self.stats['errors_count'], 'last_activity': self.stats['last_activity'], 'start_time': datetime.fromtimestamp(self.start_time) if self.start_time else None } def setup_signal_handlers(self) -> None: """Setup signal handlers for graceful shutdown.""" def signal_handler(signum, frame): self.logger.info(f"📡 Received shutdown signal ({signum}), stopping gracefully...") self.shutdown_event.set() signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) async def run(self, duration_hours: Optional[float] = None) -> bool: """ Run the data collection service. Args: duration_hours: Optional duration to run (None = indefinite) Returns: bool: True if successful, False if error occurred """ self.setup_signal_handlers() try: # Initialize collectors if not await self.initialize_collectors(): return False # Start service if not await self.start(): return False # Service running notification status = self.get_status() if duration_hours: self.logger.info(f"⏱️ Service will run for {duration_hours} hours") else: self.logger.info("⏱️ Service running indefinitely (until stopped)") self.logger.info(f"📊 Active collectors: {status['collectors_running']}") self.logger.info("🔍 Monitor with: python scripts/monitor_clean.py") # Main service loop update_interval = 600 # Status update every 10 minutes last_update = time.time() while not self.shutdown_event.is_set(): # Wait for shutdown signal or timeout try: await asyncio.wait_for(self.shutdown_event.wait(), timeout=1.0) break except asyncio.TimeoutError: pass current_time = time.time() # Check duration limit if duration_hours and self.start_time: elapsed_hours = (current_time - self.start_time) / 3600 if elapsed_hours >= duration_hours: self.logger.info(f"⏰ Completed {duration_hours} hour run") break # Periodic status update if current_time - last_update >= update_interval: if self.start_time: elapsed_hours = (current_time - self.start_time) / 3600 self.logger.info(f"⏱️ Service uptime: {elapsed_hours:.1f} hours") last_update = current_time return True except Exception as e: self.logger.error(f"❌ Service error: {e}", exc_info=True) self.stats['errors_count'] += 1 return False finally: await self.stop() # Service entry point function async def run_data_collection_service( config_path: str = "config/data_collection.json", duration_hours: Optional[float] = None ) -> bool: """Run the data collection service.""" service = DataCollectionService(config_path) return await service.run(duration_hours) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Data Collection Service") parser.add_argument("--config", default="config/data_collection.json", help="Configuration file path") parser.add_argument("--duration", type=float, help="Duration to run in hours (default: indefinite)") args = parser.parse_args() # Run service asyncio.run(run_data_collection_service(args.config, args.duration))