#!/usr/bin/env python3 """ Clean Production OKX Data Collector This script runs OKX data collection with minimal console output and comprehensive file logging for production use. Usage: python scripts/production_clean.py [--hours duration] Examples: # Run for 8 hours python scripts/production_clean.py --hours 8 # Run overnight (12 hours) python scripts/production_clean.py --hours 12 """ import asyncio import argparse import signal import sys import time import json from datetime import datetime from pathlib import Path from typing import List, Optional # Add project root to path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) # Set environment variable to disable SQLAlchemy echo for clean production import os os.environ['DEBUG'] = 'false' # Suppress SQLAlchemy verbose logging globally for production import logging logging.getLogger('sqlalchemy').setLevel(logging.CRITICAL) logging.getLogger('sqlalchemy.engine').setLevel(logging.CRITICAL) logging.getLogger('sqlalchemy.pool').setLevel(logging.CRITICAL) logging.getLogger('sqlalchemy.dialects').setLevel(logging.CRITICAL) logging.getLogger('sqlalchemy.orm').setLevel(logging.CRITICAL) from data.exchanges.okx import OKXCollector from data.exchanges.okx.data_processor import OKXDataProcessor from data.collector_manager import CollectorManager from data.base_collector import DataType from data.common import CandleProcessingConfig from database.connection import init_database from utils.logger import get_logger class ProductionManager: """Production manager for OKX data collection.""" def __init__(self, config_path: str = "config/okx_config.json"): self.config_path = config_path self.config = self._load_config() # Configure clean logging - minimal console output, error-only file logs self.logger = get_logger("production_manager", verbose=False) # Core components with error-only logging self.collector_manager = CollectorManager(logger=self.logger, log_errors_only=True) self.collectors: List[OKXCollector] = [] # Runtime state self.running = False self.start_time = None self.statistics = { 'collectors_created': 0, 'uptime_seconds': 0 } self.logger.info(f"šŸš€ Production Manager initialized with error-only logging") self.logger.info(f"šŸ“ Config: {config_path}") def _load_config(self) -> dict: """Load configuration from JSON file.""" try: with open(self.config_path, 'r') as f: config = json.load(f) return config except Exception as e: print(f"āŒ Failed to load config from {self.config_path}: {e}") sys.exit(1) async def create_collectors(self) -> bool: """Create collectors for all enabled trading pairs.""" try: enabled_pairs = [ pair for pair in self.config['trading_pairs'] if pair.get('enabled', True) ] self.logger.info(f"šŸŽÆ Creating collectors for {len(enabled_pairs)} trading pairs...") for pair_config in enabled_pairs: symbol = pair_config['symbol'] data_types = [DataType(dt) for dt in pair_config.get('data_types', ['trade'])] # Get timeframes from config file for this trading pair config_timeframes = pair_config.get('timeframes', ['1m', '5m']) self.logger.info(f"šŸ“ˆ Creating collector for {symbol} with timeframes: {config_timeframes}") # Create custom candle processing config using timeframes from config candle_config = CandleProcessingConfig( timeframes=config_timeframes, emit_incomplete_candles=False, # Only complete candles auto_save_candles=True ) # Create custom data processor with error-only logging data_processor = OKXDataProcessor( symbol=symbol, config=candle_config, component_name=f"okx_processor_{symbol.replace('-', '_').lower()}", logger=self.logger ) # Create OKX collector with error-only logging collector = OKXCollector( symbol=symbol, data_types=data_types, component_name=f"okx_collector_{symbol.replace('-', '_').lower()}", auto_restart=False, # Disable auto-restart to prevent health check interference health_check_interval=self.config.get('data_collection', {}).get('health_check_interval', 120.0), store_raw_data=self.config.get('data_collection', {}).get('store_raw_data', True), logger=self.logger, log_errors_only=False # Enable full logging temporarily to debug WebSocket issues ) # Replace the default data processor with our custom one collector._data_processor = data_processor # Add callbacks for processed data data_processor.add_trade_callback(collector._on_trade_processed) data_processor.add_candle_callback(collector._on_candle_processed) # Add to manager self.collector_manager.add_collector(collector) self.collectors.append(collector) self.statistics['collectors_created'] += 1 self.logger.info(f"āœ… Collector created for {symbol} with {'/'.join(config_timeframes)} timeframes") self.logger.info(f"šŸŽ‰ All {len(self.collectors)} collectors created successfully") # Get unique timeframes across all collectors for summary all_timeframes = set() for pair in enabled_pairs: all_timeframes.update(pair.get('timeframes', ['1m', '5m'])) self.logger.info(f"šŸ“Š Collectors configured with timeframes: {', '.join(sorted(all_timeframes))}") return True except Exception as e: self.logger.error(f"āŒ Failed to create collectors: {e}") return False async def start(self) -> bool: """Start all collectors and begin data collection.""" try: self.start_time = time.time() self.running = True self.logger.info("šŸš€ Starting production data collection...") # Initialize global database managers self.logger.info("šŸ“Š Initializing database...") init_database() self.logger.info("āœ… Database initialized successfully") # Start collector manager success = await self.collector_manager.start() if not success: self.logger.error("āŒ Failed to start collector manager") return False self.logger.info("āœ… All collectors started successfully") self.logger.info("šŸ“Š Data collection is now active with built-in processing") return True except Exception as e: self.logger.error(f"āŒ Failed to start collectors: {e}") return False async def stop(self) -> None: """Stop all collectors gracefully.""" try: self.logger.info("šŸ›‘ Stopping production data collection...") self.running = False # Stop collector manager await self.collector_manager.stop() self.logger.info("āœ… All collectors stopped gracefully") except Exception as e: self.logger.error(f"āŒ Error during shutdown: {e}") async def run_clean_production(duration_hours: Optional[float] = None): """Run production collector with clean output.""" # Global state for signal handling shutdown_event = asyncio.Event() manager = None def signal_handler(signum, frame): print(f"\nšŸ“” Shutdown signal received, stopping gracefully...") shutdown_event.set() # Set up signal handlers signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) try: # Read config to show actual timeframes in banner config_path = "config/okx_config.json" try: with open(config_path, 'r') as f: config = json.load(f) # Get unique timeframes from all enabled trading pairs all_timeframes = set() for pair in config.get('trading_pairs', []): if pair.get('enabled', True): all_timeframes.update(pair.get('timeframes', ['1m', '5m'])) timeframes_str = ', '.join(sorted(all_timeframes)) except: timeframes_str = "configured timeframes" # Header print("šŸš€ OKX PRODUCTION DATA COLLECTOR") print("="*50) if duration_hours: print(f"ā±ļø Duration: {duration_hours} hours") else: print(f"ā±ļø Duration: Indefinite (until stopped)") print(f"šŸ“Š Timeframes: {timeframes_str}") print(f"šŸ’¾ Database: Raw trades + aggregated candles") print(f"šŸ“ Logs: logs/ directory") print("="*50) # Create manager print("šŸŽÆ Initializing collector...") manager = ProductionManager("config/okx_config.json") # Create collectors if not await manager.create_collectors(): print("āŒ Failed to create collectors") return False # Start data collection print("šŸš€ Starting data collection...") if not await manager.start(): print("āŒ Failed to start data collection") return False # Running status start_time = time.time() print("āœ… Data collection active!") print(f"šŸ“ˆ Collecting: {len(manager.collectors)} trading pairs") print(f"šŸ“Š Monitor: python scripts/monitor_clean.py") if not duration_hours: print("ā¹ļø Stop: Ctrl+C") print("-" * 50) # Main monitoring loop last_update = time.time() update_interval = 600 # Update every 10 minutes while not shutdown_event.is_set(): # Wait for shutdown or timeout try: await asyncio.wait_for(shutdown_event.wait(), timeout=1.0) break except asyncio.TimeoutError: pass # Check duration if specified current_time = time.time() if duration_hours: duration_seconds = int(duration_hours * 3600) if current_time - start_time >= duration_seconds: print(f"ā° Completed {duration_hours} hour run") break # Periodic status update if current_time - last_update >= update_interval: elapsed_hours = (current_time - start_time) / 3600 if duration_hours: remaining_hours = duration_hours - elapsed_hours print(f"ā±ļø Runtime: {elapsed_hours:.1f}h | Remaining: {remaining_hours:.1f}h") else: print(f"ā±ļø Runtime: {elapsed_hours:.1f}h | Mode: Continuous") last_update = current_time # Final summary total_runtime = (time.time() - start_time) / 3600 print(f"\nšŸ“Š COLLECTION COMPLETE") print(f"ā±ļø Total runtime: {total_runtime:.2f} hours") print(f"šŸ“ˆ Collectors: {len(manager.collectors)} active") print(f"šŸ“‹ View results: python scripts/monitor_clean.py") return True except Exception as e: print(f"āŒ Error: {e}") return False finally: if manager: print("šŸ›‘ Stopping collectors...") await manager.stop() print("āœ… Shutdown complete") def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Clean Production OKX Data Collector", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Run indefinitely (until stopped with Ctrl+C) python scripts/production_clean.py # Run for 8 hours python scripts/production_clean.py --hours 8 # Run overnight (12 hours) python scripts/production_clean.py --hours 12 """ ) parser.add_argument( '--hours', type=float, default=None, help='Collection duration in hours (default: indefinite until stopped manually)' ) args = parser.parse_args() if args.hours is not None and args.hours <= 0: print("āŒ Duration must be positive") sys.exit(1) try: success = asyncio.run(run_clean_production(args.hours)) sys.exit(0 if success else 1) except KeyboardInterrupt: print("\nšŸ‘‹ Interrupted by user") sys.exit(0) except Exception as e: print(f"āŒ Fatal error: {e}") sys.exit(1) if __name__ == "__main__": main()