2025-06-02 14:23:08 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Data Collection Service
|
|
|
|
|
|
|
|
|
|
Production-ready service for cryptocurrency market data collection
|
|
|
|
|
with clean logging and robust error handling.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import signal
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import List, Optional, Dict, Any
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
# Add project root to path
|
|
|
|
|
project_root = Path(__file__).parent.parent
|
|
|
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
|
|
|
|
|
|
# Set environment for clean production logging
|
|
|
|
|
import os
|
|
|
|
|
os.environ['DEBUG'] = 'false'
|
|
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
# Suppress verbose SQLAlchemy logging
|
2025-06-02 14:23:08 +08:00
|
|
|
logging.getLogger('sqlalchemy').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.pool').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.dialects').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.orm').setLevel(logging.WARNING)
|
|
|
|
|
|
|
|
|
|
from data.collector_manager import CollectorManager
|
2025-06-10 12:55:27 +08:00
|
|
|
from config.service_config import ServiceConfig
|
|
|
|
|
from data.collector_factory import CollectorFactory
|
2025-06-02 14:23:08 +08:00
|
|
|
from database.connection import init_database
|
|
|
|
|
from utils.logger import get_logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DataCollectionService:
|
2025-06-10 12:55:27 +08:00
|
|
|
"""Production data collection service with modular architecture."""
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
def __init__(self, config_path: str = "config/data_collection.json"):
|
|
|
|
|
"""Initialize the data collection service."""
|
|
|
|
|
self.config_path = config_path
|
2025-06-10 12:55:27 +08:00
|
|
|
self.logger = get_logger("data_collection_service", log_level="INFO", verbose=False)
|
2025-06-02 14:23:08 +08:00
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
# Initialize configuration and factory
|
|
|
|
|
self.service_config = ServiceConfig(config_path, logger=self.logger)
|
|
|
|
|
self.config = self.service_config.load_config()
|
|
|
|
|
self.collector_factory = CollectorFactory(logger=self.logger)
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
# Core components
|
2025-06-10 12:55:27 +08:00
|
|
|
self.collector_manager = CollectorManager(logger=self.logger, log_errors_only=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.collectors: List = []
|
|
|
|
|
|
|
|
|
|
# Service state
|
|
|
|
|
self.running = False
|
|
|
|
|
self.start_time = None
|
|
|
|
|
self.shutdown_event = asyncio.Event()
|
|
|
|
|
|
|
|
|
|
# Statistics for monitoring
|
|
|
|
|
self.stats = {
|
|
|
|
|
'collectors_created': 0,
|
|
|
|
|
'collectors_running': 0,
|
|
|
|
|
'total_uptime_seconds': 0,
|
|
|
|
|
'last_activity': None,
|
|
|
|
|
'errors_count': 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.logger.info("🚀 Data Collection Service initialized")
|
|
|
|
|
self.logger.info(f"📁 Configuration: {config_path}")
|
|
|
|
|
|
|
|
|
|
async def initialize_collectors(self) -> bool:
|
|
|
|
|
"""Initialize all data collectors based on configuration."""
|
|
|
|
|
try:
|
2025-06-10 12:55:27 +08:00
|
|
|
collectors = await self.collector_factory.create_collectors_from_config(self.config)
|
2025-06-02 14:23:08 +08:00
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
if not collectors:
|
|
|
|
|
self.logger.error("❌ No collectors were successfully created")
|
2025-06-02 14:23:08 +08:00
|
|
|
return False
|
|
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
for collector in collectors:
|
2025-06-02 14:23:08 +08:00
|
|
|
self.collector_manager.add_collector(collector)
|
|
|
|
|
self.collectors.append(collector)
|
2025-06-10 12:55:27 +08:00
|
|
|
|
|
|
|
|
self.stats['collectors_created'] = len(collectors)
|
|
|
|
|
self.logger.info(f"✅ Successfully initialized {len(collectors)} data collectors")
|
|
|
|
|
return True
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-10 12:55:27 +08:00
|
|
|
self.logger.error(f"❌ Failed to initialize collectors: {e}", exc_info=True)
|
|
|
|
|
self.stats['errors_count'] += 1
|
2025-06-02 14:23:08 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
async def start(self) -> bool:
|
|
|
|
|
"""Start the data collection service."""
|
|
|
|
|
try:
|
|
|
|
|
self.start_time = time.time()
|
|
|
|
|
self.running = True
|
|
|
|
|
|
|
|
|
|
self.logger.info("🚀 Starting Data Collection Service...")
|
|
|
|
|
|
|
|
|
|
self.logger.info("📊 Initializing database connection...")
|
|
|
|
|
init_database()
|
|
|
|
|
self.logger.info("✅ Database connection established")
|
|
|
|
|
|
|
|
|
|
# Start collector manager
|
|
|
|
|
self.logger.info("🔌 Starting data collectors...")
|
|
|
|
|
success = await self.collector_manager.start()
|
|
|
|
|
|
|
|
|
|
if success:
|
|
|
|
|
self.stats['collectors_running'] = len(self.collectors)
|
|
|
|
|
self.stats['last_activity'] = datetime.now()
|
|
|
|
|
|
|
|
|
|
self.logger.info("✅ Data Collection Service started successfully")
|
|
|
|
|
self.logger.info(f"📈 Active collectors: {self.stats['collectors_running']}")
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
self.logger.error("❌ Failed to start data collectors")
|
|
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-10 12:55:27 +08:00
|
|
|
self.logger.error(f"❌ Failed to start service: {e}", exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
async def stop(self) -> None:
|
|
|
|
|
"""Stop the data collection service gracefully."""
|
|
|
|
|
try:
|
|
|
|
|
self.logger.info("🛑 Stopping Data Collection Service...")
|
|
|
|
|
self.running = False
|
|
|
|
|
|
|
|
|
|
# Stop all collectors
|
|
|
|
|
await self.collector_manager.stop()
|
|
|
|
|
|
|
|
|
|
# Update statistics
|
|
|
|
|
if self.start_time:
|
|
|
|
|
self.stats['total_uptime_seconds'] = time.time() - self.start_time
|
|
|
|
|
|
|
|
|
|
self.stats['collectors_running'] = 0
|
|
|
|
|
|
|
|
|
|
self.logger.info("✅ Data Collection Service stopped gracefully")
|
|
|
|
|
self.logger.info(f"📊 Total uptime: {self.stats['total_uptime_seconds']:.1f} seconds")
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-10 12:55:27 +08:00
|
|
|
self.logger.error(f"❌ Error during service shutdown: {e}", exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
|
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
|
|
|
"""Get current service status."""
|
|
|
|
|
current_time = time.time()
|
|
|
|
|
uptime = current_time - self.start_time if self.start_time else 0
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'running': self.running,
|
|
|
|
|
'uptime_seconds': uptime,
|
|
|
|
|
'uptime_hours': uptime / 3600,
|
|
|
|
|
'collectors_total': len(self.collectors),
|
|
|
|
|
'collectors_running': self.stats['collectors_running'],
|
|
|
|
|
'errors_count': self.stats['errors_count'],
|
|
|
|
|
'last_activity': self.stats['last_activity'],
|
|
|
|
|
'start_time': datetime.fromtimestamp(self.start_time) if self.start_time else None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def setup_signal_handlers(self) -> None:
|
|
|
|
|
"""Setup signal handlers for graceful shutdown."""
|
|
|
|
|
def signal_handler(signum, frame):
|
|
|
|
|
self.logger.info(f"📡 Received shutdown signal ({signum}), stopping gracefully...")
|
|
|
|
|
self.shutdown_event.set()
|
|
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
|
|
|
|
|
|
async def run(self, duration_hours: Optional[float] = None) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Run the data collection service.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
duration_hours: Optional duration to run (None = indefinite)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
bool: True if successful, False if error occurred
|
|
|
|
|
"""
|
|
|
|
|
self.setup_signal_handlers()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Initialize collectors
|
|
|
|
|
if not await self.initialize_collectors():
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Start service
|
|
|
|
|
if not await self.start():
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Service running notification
|
|
|
|
|
status = self.get_status()
|
|
|
|
|
if duration_hours:
|
|
|
|
|
self.logger.info(f"⏱️ Service will run for {duration_hours} hours")
|
|
|
|
|
else:
|
|
|
|
|
self.logger.info("⏱️ Service running indefinitely (until stopped)")
|
|
|
|
|
|
|
|
|
|
self.logger.info(f"📊 Active collectors: {status['collectors_running']}")
|
|
|
|
|
self.logger.info("🔍 Monitor with: python scripts/monitor_clean.py")
|
|
|
|
|
|
|
|
|
|
# Main service loop
|
|
|
|
|
update_interval = 600 # Status update every 10 minutes
|
|
|
|
|
last_update = time.time()
|
|
|
|
|
|
|
|
|
|
while not self.shutdown_event.is_set():
|
|
|
|
|
# Wait for shutdown signal or timeout
|
|
|
|
|
try:
|
|
|
|
|
await asyncio.wait_for(self.shutdown_event.wait(), timeout=1.0)
|
|
|
|
|
break
|
|
|
|
|
except asyncio.TimeoutError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
current_time = time.time()
|
|
|
|
|
|
|
|
|
|
# Check duration limit
|
2025-06-10 12:55:27 +08:00
|
|
|
if duration_hours and self.start_time:
|
2025-06-02 14:23:08 +08:00
|
|
|
elapsed_hours = (current_time - self.start_time) / 3600
|
|
|
|
|
if elapsed_hours >= duration_hours:
|
|
|
|
|
self.logger.info(f"⏰ Completed {duration_hours} hour run")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# Periodic status update
|
|
|
|
|
if current_time - last_update >= update_interval:
|
2025-06-10 12:55:27 +08:00
|
|
|
if self.start_time:
|
|
|
|
|
elapsed_hours = (current_time - self.start_time) / 3600
|
|
|
|
|
self.logger.info(f"⏱️ Service uptime: {elapsed_hours:.1f} hours")
|
2025-06-02 14:23:08 +08:00
|
|
|
last_update = current_time
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-06-10 12:55:27 +08:00
|
|
|
self.logger.error(f"❌ Service error: {e}", exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
finally:
|
|
|
|
|
await self.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Service entry point function
|
|
|
|
|
async def run_data_collection_service(
|
|
|
|
|
config_path: str = "config/data_collection.json",
|
|
|
|
|
duration_hours: Optional[float] = None
|
|
|
|
|
) -> bool:
|
2025-06-10 12:55:27 +08:00
|
|
|
"""Run the data collection service."""
|
2025-06-02 14:23:08 +08:00
|
|
|
service = DataCollectionService(config_path)
|
|
|
|
|
return await service.run(duration_hours)
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="Data Collection Service")
|
2025-06-10 12:55:27 +08:00
|
|
|
parser.add_argument("--config", default="config/data_collection.json", help="Configuration file path")
|
|
|
|
|
parser.add_argument("--duration", type=float, help="Duration to run in hours (default: indefinite)")
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
# Run service
|
|
|
|
|
asyncio.run(run_data_collection_service(args.config, args.duration))
|