TCPDashboard/data/collection_service.py
Vasily.onl 2890ba2efa Implement Service Configuration Manager for data collection service
- Introduced `service_config.py` to manage configuration loading, validation, and schema management, enhancing modularity and security.
- Created a `ServiceConfig` class for handling configuration with robust error handling and default values.
- Refactored `DataCollectionService` to utilize the new `ServiceConfig`, streamlining configuration management and improving readability.
- Added a `CollectorFactory` to encapsulate collector creation logic, promoting separation of concerns.
- Updated `CollectorManager` and related components to align with the new architecture, ensuring better maintainability.
- Enhanced logging practices across the service for improved monitoring and debugging.

These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity and performance.
2025-06-10 12:55:27 +08:00

264 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Data Collection Service
Production-ready service for cryptocurrency market data collection
with clean logging and robust error handling.
"""
import asyncio
import signal
import sys
import time
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Dict, Any
import logging
# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
# Set environment for clean production logging
import os
os.environ['DEBUG'] = 'false'
# Suppress verbose SQLAlchemy logging
logging.getLogger('sqlalchemy').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.pool').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.dialects').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.orm').setLevel(logging.WARNING)
from data.collector_manager import CollectorManager
from config.service_config import ServiceConfig
from data.collector_factory import CollectorFactory
from database.connection import init_database
from utils.logger import get_logger
class DataCollectionService:
"""Production data collection service with modular architecture."""
def __init__(self, config_path: str = "config/data_collection.json"):
"""Initialize the data collection service."""
self.config_path = config_path
self.logger = get_logger("data_collection_service", log_level="INFO", verbose=False)
# Initialize configuration and factory
self.service_config = ServiceConfig(config_path, logger=self.logger)
self.config = self.service_config.load_config()
self.collector_factory = CollectorFactory(logger=self.logger)
# Core components
self.collector_manager = CollectorManager(logger=self.logger, log_errors_only=True)
self.collectors: List = []
# Service state
self.running = False
self.start_time = None
self.shutdown_event = asyncio.Event()
# Statistics for monitoring
self.stats = {
'collectors_created': 0,
'collectors_running': 0,
'total_uptime_seconds': 0,
'last_activity': None,
'errors_count': 0
}
self.logger.info("🚀 Data Collection Service initialized")
self.logger.info(f"📁 Configuration: {config_path}")
async def initialize_collectors(self) -> bool:
"""Initialize all data collectors based on configuration."""
try:
collectors = await self.collector_factory.create_collectors_from_config(self.config)
if not collectors:
self.logger.error("❌ No collectors were successfully created")
return False
for collector in collectors:
self.collector_manager.add_collector(collector)
self.collectors.append(collector)
self.stats['collectors_created'] = len(collectors)
self.logger.info(f"✅ Successfully initialized {len(collectors)} data collectors")
return True
except Exception as e:
self.logger.error(f"❌ Failed to initialize collectors: {e}", exc_info=True)
self.stats['errors_count'] += 1
return False
async def start(self) -> bool:
"""Start the data collection service."""
try:
self.start_time = time.time()
self.running = True
self.logger.info("🚀 Starting Data Collection Service...")
self.logger.info("📊 Initializing database connection...")
init_database()
self.logger.info("✅ Database connection established")
# Start collector manager
self.logger.info("🔌 Starting data collectors...")
success = await self.collector_manager.start()
if success:
self.stats['collectors_running'] = len(self.collectors)
self.stats['last_activity'] = datetime.now()
self.logger.info("✅ Data Collection Service started successfully")
self.logger.info(f"📈 Active collectors: {self.stats['collectors_running']}")
return True
else:
self.logger.error("❌ Failed to start data collectors")
self.stats['errors_count'] += 1
return False
except Exception as e:
self.logger.error(f"❌ Failed to start service: {e}", exc_info=True)
self.stats['errors_count'] += 1
return False
async def stop(self) -> None:
"""Stop the data collection service gracefully."""
try:
self.logger.info("🛑 Stopping Data Collection Service...")
self.running = False
# Stop all collectors
await self.collector_manager.stop()
# Update statistics
if self.start_time:
self.stats['total_uptime_seconds'] = time.time() - self.start_time
self.stats['collectors_running'] = 0
self.logger.info("✅ Data Collection Service stopped gracefully")
self.logger.info(f"📊 Total uptime: {self.stats['total_uptime_seconds']:.1f} seconds")
except Exception as e:
self.logger.error(f"❌ Error during service shutdown: {e}", exc_info=True)
self.stats['errors_count'] += 1
def get_status(self) -> Dict[str, Any]:
"""Get current service status."""
current_time = time.time()
uptime = current_time - self.start_time if self.start_time else 0
return {
'running': self.running,
'uptime_seconds': uptime,
'uptime_hours': uptime / 3600,
'collectors_total': len(self.collectors),
'collectors_running': self.stats['collectors_running'],
'errors_count': self.stats['errors_count'],
'last_activity': self.stats['last_activity'],
'start_time': datetime.fromtimestamp(self.start_time) if self.start_time else None
}
def setup_signal_handlers(self) -> None:
"""Setup signal handlers for graceful shutdown."""
def signal_handler(signum, frame):
self.logger.info(f"📡 Received shutdown signal ({signum}), stopping gracefully...")
self.shutdown_event.set()
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
async def run(self, duration_hours: Optional[float] = None) -> bool:
"""
Run the data collection service.
Args:
duration_hours: Optional duration to run (None = indefinite)
Returns:
bool: True if successful, False if error occurred
"""
self.setup_signal_handlers()
try:
# Initialize collectors
if not await self.initialize_collectors():
return False
# Start service
if not await self.start():
return False
# Service running notification
status = self.get_status()
if duration_hours:
self.logger.info(f"⏱️ Service will run for {duration_hours} hours")
else:
self.logger.info("⏱️ Service running indefinitely (until stopped)")
self.logger.info(f"📊 Active collectors: {status['collectors_running']}")
self.logger.info("🔍 Monitor with: python scripts/monitor_clean.py")
# Main service loop
update_interval = 600 # Status update every 10 minutes
last_update = time.time()
while not self.shutdown_event.is_set():
# Wait for shutdown signal or timeout
try:
await asyncio.wait_for(self.shutdown_event.wait(), timeout=1.0)
break
except asyncio.TimeoutError:
pass
current_time = time.time()
# Check duration limit
if duration_hours and self.start_time:
elapsed_hours = (current_time - self.start_time) / 3600
if elapsed_hours >= duration_hours:
self.logger.info(f"⏰ Completed {duration_hours} hour run")
break
# Periodic status update
if current_time - last_update >= update_interval:
if self.start_time:
elapsed_hours = (current_time - self.start_time) / 3600
self.logger.info(f"⏱️ Service uptime: {elapsed_hours:.1f} hours")
last_update = current_time
return True
except Exception as e:
self.logger.error(f"❌ Service error: {e}", exc_info=True)
self.stats['errors_count'] += 1
return False
finally:
await self.stop()
# Service entry point function
async def run_data_collection_service(
config_path: str = "config/data_collection.json",
duration_hours: Optional[float] = None
) -> bool:
"""Run the data collection service."""
service = DataCollectionService(config_path)
return await service.run(duration_hours)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Data Collection Service")
parser.add_argument("--config", default="config/data_collection.json", help="Configuration file path")
parser.add_argument("--duration", type=float, help="Duration to run in hours (default: indefinite)")
args = parser.parse_args()
# Run service
asyncio.run(run_data_collection_service(args.config, args.duration))