Implement Service Configuration Manager for data collection service
- Introduced `service_config.py` to manage configuration loading, validation, and schema management, enhancing modularity and security. - Created a `ServiceConfig` class for handling configuration with robust error handling and default values. - Refactored `DataCollectionService` to utilize the new `ServiceConfig`, streamlining configuration management and improving readability. - Added a `CollectorFactory` to encapsulate collector creation logic, promoting separation of concerns. - Updated `CollectorManager` and related components to align with the new architecture, ensuring better maintainability. - Enhanced logging practices across the service for improved monitoring and debugging. These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity and performance.
This commit is contained in:
@@ -2,39 +2,27 @@
|
||||
"""
|
||||
Clean Production OKX Data Collector
|
||||
|
||||
This script runs OKX data collection with minimal console output
|
||||
and comprehensive file logging for production use.
|
||||
|
||||
Usage:
|
||||
python scripts/production_clean.py [--hours duration]
|
||||
|
||||
Examples:
|
||||
# Run for 8 hours
|
||||
python scripts/production_clean.py --hours 8
|
||||
|
||||
# Run overnight (12 hours)
|
||||
python scripts/production_clean.py --hours 12
|
||||
Simplified production script using the new DataCollectionService architecture.
|
||||
Provides clean console output with minimal logging for production environments.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
# Add project root to path
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
# Set environment variable to disable SQLAlchemy echo for clean production
|
||||
# Set environment for clean production logging
|
||||
import os
|
||||
os.environ['DEBUG'] = 'false'
|
||||
|
||||
# Suppress SQLAlchemy verbose logging globally for production
|
||||
# Suppress verbose SQLAlchemy logging
|
||||
import logging
|
||||
logging.getLogger('sqlalchemy').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.CRITICAL)
|
||||
@@ -42,319 +30,88 @@ logging.getLogger('sqlalchemy.pool').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('sqlalchemy.dialects').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('sqlalchemy.orm').setLevel(logging.CRITICAL)
|
||||
|
||||
from data.exchanges.okx import OKXCollector
|
||||
from data.exchanges.okx.data_processor import OKXDataProcessor
|
||||
from data.collector_manager import CollectorManager
|
||||
from data.base_collector import DataType
|
||||
from data.common import CandleProcessingConfig
|
||||
from database.connection import init_database
|
||||
from utils.logger import get_logger
|
||||
from data.collection_service import run_data_collection_service
|
||||
|
||||
|
||||
class ProductionManager:
|
||||
"""Production manager for OKX data collection."""
|
||||
|
||||
def __init__(self, config_path: str = "config/okx_config.json"):
|
||||
self.config_path = config_path
|
||||
self.config = self._load_config()
|
||||
|
||||
# Configure clean logging - minimal console output, error-only file logs
|
||||
self.logger = get_logger("production_manager", verbose=False)
|
||||
|
||||
# Core components with error-only logging
|
||||
self.collector_manager = CollectorManager(logger=self.logger, log_errors_only=True)
|
||||
self.collectors: List[OKXCollector] = []
|
||||
|
||||
# Runtime state
|
||||
self.running = False
|
||||
self.start_time = None
|
||||
self.statistics = {
|
||||
'collectors_created': 0,
|
||||
'uptime_seconds': 0
|
||||
}
|
||||
|
||||
self.logger.info(f"🚀 Production Manager initialized with error-only logging")
|
||||
self.logger.info(f"📁 Config: {config_path}")
|
||||
|
||||
def _load_config(self) -> dict:
|
||||
"""Load configuration from JSON file."""
|
||||
try:
|
||||
with open(self.config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
return config
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to load config from {self.config_path}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
async def create_collectors(self) -> bool:
|
||||
"""Create collectors for all enabled trading pairs."""
|
||||
try:
|
||||
enabled_pairs = [
|
||||
pair for pair in self.config['trading_pairs']
|
||||
if pair.get('enabled', True)
|
||||
]
|
||||
|
||||
self.logger.info(f"🎯 Creating collectors for {len(enabled_pairs)} trading pairs...")
|
||||
|
||||
for pair_config in enabled_pairs:
|
||||
symbol = pair_config['symbol']
|
||||
data_types = [DataType(dt) for dt in pair_config.get('data_types', ['trade'])]
|
||||
|
||||
# Get timeframes from config file for this trading pair
|
||||
config_timeframes = pair_config.get('timeframes', ['1m', '5m'])
|
||||
|
||||
self.logger.info(f"📈 Creating collector for {symbol} with timeframes: {config_timeframes}")
|
||||
|
||||
# Create custom candle processing config using timeframes from config
|
||||
candle_config = CandleProcessingConfig(
|
||||
timeframes=config_timeframes,
|
||||
emit_incomplete_candles=False, # Only complete candles
|
||||
auto_save_candles=True
|
||||
)
|
||||
|
||||
# Create custom data processor with error-only logging
|
||||
data_processor = OKXDataProcessor(
|
||||
symbol=symbol,
|
||||
config=candle_config,
|
||||
component_name=f"okx_processor_{symbol.replace('-', '_').lower()}",
|
||||
logger=self.logger
|
||||
)
|
||||
|
||||
# Create OKX collector with error-only logging
|
||||
collector = OKXCollector(
|
||||
symbol=symbol,
|
||||
data_types=data_types,
|
||||
component_name=f"okx_collector_{symbol.replace('-', '_').lower()}",
|
||||
auto_restart=False, # Disable auto-restart to prevent health check interference
|
||||
health_check_interval=self.config.get('data_collection', {}).get('health_check_interval', 120.0),
|
||||
store_raw_data=self.config.get('data_collection', {}).get('store_raw_data', True),
|
||||
logger=self.logger,
|
||||
log_errors_only=False # Enable full logging temporarily to debug WebSocket issues
|
||||
)
|
||||
|
||||
# Replace the default data processor with our custom one
|
||||
collector._data_processor = data_processor
|
||||
|
||||
# Add callbacks for processed data
|
||||
data_processor.add_trade_callback(collector._on_trade_processed)
|
||||
data_processor.add_candle_callback(collector._on_candle_processed)
|
||||
|
||||
# Add to manager
|
||||
self.collector_manager.add_collector(collector)
|
||||
self.collectors.append(collector)
|
||||
self.statistics['collectors_created'] += 1
|
||||
|
||||
self.logger.info(f"✅ Collector created for {symbol} with {'/'.join(config_timeframes)} timeframes")
|
||||
|
||||
self.logger.info(f"🎉 All {len(self.collectors)} collectors created successfully")
|
||||
# Get unique timeframes across all collectors for summary
|
||||
all_timeframes = set()
|
||||
for pair in enabled_pairs:
|
||||
async def get_config_timeframes(config_path: str) -> str:
|
||||
"""Get timeframes from configuration for display."""
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
# Get unique timeframes from all enabled trading pairs
|
||||
all_timeframes = set()
|
||||
for pair in config.get('trading_pairs', []):
|
||||
if pair.get('enabled', True):
|
||||
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
|
||||
self.logger.info(f"📊 Collectors configured with timeframes: {', '.join(sorted(all_timeframes))}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Failed to create collectors: {e}")
|
||||
return False
|
||||
|
||||
async def start(self) -> bool:
|
||||
"""Start all collectors and begin data collection."""
|
||||
try:
|
||||
self.start_time = time.time()
|
||||
self.running = True
|
||||
|
||||
self.logger.info("🚀 Starting production data collection...")
|
||||
|
||||
# Initialize global database managers
|
||||
self.logger.info("📊 Initializing database...")
|
||||
init_database()
|
||||
self.logger.info("✅ Database initialized successfully")
|
||||
|
||||
# Start collector manager
|
||||
success = await self.collector_manager.start()
|
||||
if not success:
|
||||
self.logger.error("❌ Failed to start collector manager")
|
||||
return False
|
||||
|
||||
self.logger.info("✅ All collectors started successfully")
|
||||
self.logger.info("📊 Data collection is now active with built-in processing")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Failed to start collectors: {e}")
|
||||
return False
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop all collectors gracefully."""
|
||||
try:
|
||||
self.logger.info("🛑 Stopping production data collection...")
|
||||
self.running = False
|
||||
|
||||
# Stop collector manager
|
||||
await self.collector_manager.stop()
|
||||
|
||||
self.logger.info("✅ All collectors stopped gracefully")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Error during shutdown: {e}")
|
||||
return ', '.join(sorted(all_timeframes))
|
||||
except:
|
||||
return "configured timeframes"
|
||||
|
||||
|
||||
async def run_clean_production(duration_hours: Optional[float] = None):
|
||||
async def run_clean_production(duration_hours: Optional[float] = None) -> bool:
|
||||
"""Run production collector with clean output."""
|
||||
|
||||
# Global state for signal handling
|
||||
shutdown_event = asyncio.Event()
|
||||
manager = None
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
print(f"\n📡 Shutdown signal received, stopping gracefully...")
|
||||
shutdown_event.set()
|
||||
|
||||
# Set up signal handlers
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
# Configuration path - use the new service config format
|
||||
config_path = "config/data_collection.json"
|
||||
|
||||
try:
|
||||
# Read config to show actual timeframes in banner
|
||||
config_path = "config/okx_config.json"
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
# Get unique timeframes from all enabled trading pairs
|
||||
all_timeframes = set()
|
||||
for pair in config.get('trading_pairs', []):
|
||||
if pair.get('enabled', True):
|
||||
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
|
||||
timeframes_str = ', '.join(sorted(all_timeframes))
|
||||
except:
|
||||
timeframes_str = "configured timeframes"
|
||||
# Get timeframes for display
|
||||
timeframes_str = await get_config_timeframes(config_path)
|
||||
|
||||
# Header
|
||||
print("🚀 OKX PRODUCTION DATA COLLECTOR")
|
||||
print("OKX PRODUCTION DATA COLLECTOR")
|
||||
print("="*50)
|
||||
if duration_hours:
|
||||
print(f"⏱️ Duration: {duration_hours} hours")
|
||||
print(f"Duration: {duration_hours} hours")
|
||||
else:
|
||||
print(f"⏱️ Duration: Indefinite (until stopped)")
|
||||
print(f"📊 Timeframes: {timeframes_str}")
|
||||
print(f"💾 Database: Raw trades + aggregated candles")
|
||||
print(f"📝 Logs: logs/ directory")
|
||||
print(f"Duration: Indefinite (until stopped)")
|
||||
print(f"Timeframes: {timeframes_str}")
|
||||
print(f"Database: Raw trades + aggregated candles")
|
||||
print(f"Logs: logs/ directory")
|
||||
print("="*50)
|
||||
|
||||
# Create manager
|
||||
print("🎯 Initializing collector...")
|
||||
manager = ProductionManager("config/okx_config.json")
|
||||
# Start data collection using the new service
|
||||
print("Starting data collection service...")
|
||||
success = await run_data_collection_service(config_path, duration_hours)
|
||||
|
||||
# Create collectors
|
||||
if not await manager.create_collectors():
|
||||
print("❌ Failed to create collectors")
|
||||
if success:
|
||||
print("Data collection completed successfully")
|
||||
return True
|
||||
else:
|
||||
print("Data collection failed")
|
||||
return False
|
||||
|
||||
# Start data collection
|
||||
print("🚀 Starting data collection...")
|
||||
if not await manager.start():
|
||||
print("❌ Failed to start data collection")
|
||||
return False
|
||||
|
||||
# Running status
|
||||
start_time = time.time()
|
||||
print("✅ Data collection active!")
|
||||
print(f"📈 Collecting: {len(manager.collectors)} trading pairs")
|
||||
print(f"📊 Monitor: python scripts/monitor_clean.py")
|
||||
if not duration_hours:
|
||||
print("⏹️ Stop: Ctrl+C")
|
||||
print("-" * 50)
|
||||
|
||||
# Main monitoring loop
|
||||
last_update = time.time()
|
||||
update_interval = 600 # Update every 10 minutes
|
||||
|
||||
while not shutdown_event.is_set():
|
||||
# Wait for shutdown or timeout
|
||||
try:
|
||||
await asyncio.wait_for(shutdown_event.wait(), timeout=1.0)
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
# Check duration if specified
|
||||
current_time = time.time()
|
||||
if duration_hours:
|
||||
duration_seconds = int(duration_hours * 3600)
|
||||
if current_time - start_time >= duration_seconds:
|
||||
print(f"⏰ Completed {duration_hours} hour run")
|
||||
break
|
||||
|
||||
# Periodic status update
|
||||
if current_time - last_update >= update_interval:
|
||||
elapsed_hours = (current_time - start_time) / 3600
|
||||
if duration_hours:
|
||||
remaining_hours = duration_hours - elapsed_hours
|
||||
print(f"⏱️ Runtime: {elapsed_hours:.1f}h | Remaining: {remaining_hours:.1f}h")
|
||||
else:
|
||||
print(f"⏱️ Runtime: {elapsed_hours:.1f}h | Mode: Continuous")
|
||||
last_update = current_time
|
||||
|
||||
# Final summary
|
||||
total_runtime = (time.time() - start_time) / 3600
|
||||
print(f"\n📊 COLLECTION COMPLETE")
|
||||
print(f"⏱️ Total runtime: {total_runtime:.2f} hours")
|
||||
print(f"📈 Collectors: {len(manager.collectors)} active")
|
||||
print(f"📋 View results: python scripts/monitor_clean.py")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
print(f"Error: {e}")
|
||||
return False
|
||||
|
||||
finally:
|
||||
if manager:
|
||||
print("🛑 Stopping collectors...")
|
||||
await manager.stop()
|
||||
print("✅ Shutdown complete")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Clean Production OKX Data Collector",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Run indefinitely (until stopped with Ctrl+C)
|
||||
python scripts/production_clean.py
|
||||
|
||||
# Run for 8 hours
|
||||
python scripts/production_clean.py --hours 8
|
||||
|
||||
# Run overnight (12 hours)
|
||||
python scripts/production_clean.py --hours 12
|
||||
"""
|
||||
)
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Clean Production OKX Data Collector")
|
||||
parser.add_argument(
|
||||
'--hours',
|
||||
"--hours",
|
||||
type=float,
|
||||
default=None,
|
||||
help='Collection duration in hours (default: indefinite until stopped manually)'
|
||||
help="Collection duration in hours (default: indefinite until stopped manually)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate arguments
|
||||
if args.hours is not None and args.hours <= 0:
|
||||
print("❌ Duration must be positive")
|
||||
print("Duration must be positive")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
success = asyncio.run(run_clean_production(args.hours))
|
||||
sys.exit(0 if success else 1)
|
||||
except KeyboardInterrupt:
|
||||
print("\n👋 Interrupted by user")
|
||||
print("\nInterrupted by user")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
print(f"❌ Fatal error: {e}")
|
||||
print(f"Fatal error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user