328 lines
12 KiB
Python
328 lines
12 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Clean Production OKX Data Collector
|
||
|
|
|
||
|
|
This script runs OKX data collection with minimal console output
|
||
|
|
and comprehensive file logging for production use.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python scripts/production_clean.py [--hours duration]
|
||
|
|
|
||
|
|
Examples:
|
||
|
|
# Run for 8 hours
|
||
|
|
python scripts/production_clean.py --hours 8
|
||
|
|
|
||
|
|
# Run overnight (12 hours)
|
||
|
|
python scripts/production_clean.py --hours 12
|
||
|
|
"""
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
import argparse
|
||
|
|
import signal
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
import json
|
||
|
|
from datetime import datetime
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import List, Optional
|
||
|
|
|
||
|
|
# Add project root to path
|
||
|
|
project_root = Path(__file__).parent.parent
|
||
|
|
sys.path.insert(0, str(project_root))
|
||
|
|
|
||
|
|
# Set environment variable to disable SQLAlchemy echo for clean production
|
||
|
|
import os
|
||
|
|
os.environ['DEBUG'] = 'false'
|
||
|
|
|
||
|
|
# Suppress SQLAlchemy verbose logging globally for production
|
||
|
|
import logging
|
||
|
|
logging.getLogger('sqlalchemy').setLevel(logging.CRITICAL)
|
||
|
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.CRITICAL)
|
||
|
|
logging.getLogger('sqlalchemy.pool').setLevel(logging.CRITICAL)
|
||
|
|
logging.getLogger('sqlalchemy.dialects').setLevel(logging.CRITICAL)
|
||
|
|
logging.getLogger('sqlalchemy.orm').setLevel(logging.CRITICAL)
|
||
|
|
|
||
|
|
from data.exchanges.okx import OKXCollector
|
||
|
|
from data.exchanges.okx.data_processor import OKXDataProcessor
|
||
|
|
from data.collector_manager import CollectorManager
|
||
|
|
from data.base_collector import DataType
|
||
|
|
from data.common import CandleProcessingConfig
|
||
|
|
from database.connection import init_database
|
||
|
|
from utils.logger import get_logger
|
||
|
|
|
||
|
|
|
||
|
|
class ProductionManager:
|
||
|
|
"""Production manager for OKX data collection."""
|
||
|
|
|
||
|
|
def __init__(self, config_path: str = "config/okx_config.json"):
|
||
|
|
self.config_path = config_path
|
||
|
|
self.config = self._load_config()
|
||
|
|
|
||
|
|
# Configure clean logging - minimal console output, detailed file logs
|
||
|
|
self.logger = get_logger("production_manager", verbose=False)
|
||
|
|
|
||
|
|
# Core components
|
||
|
|
self.collector_manager = CollectorManager()
|
||
|
|
self.collectors: List[OKXCollector] = []
|
||
|
|
|
||
|
|
# Runtime state
|
||
|
|
self.running = False
|
||
|
|
self.start_time = None
|
||
|
|
self.statistics = {
|
||
|
|
'collectors_created': 0,
|
||
|
|
'uptime_seconds': 0
|
||
|
|
}
|
||
|
|
|
||
|
|
self.logger.info(f"🚀 Production Manager initialized")
|
||
|
|
self.logger.info(f"📁 Config: {config_path}")
|
||
|
|
|
||
|
|
def _load_config(self) -> dict:
|
||
|
|
"""Load configuration from JSON file."""
|
||
|
|
try:
|
||
|
|
with open(self.config_path, 'r') as f:
|
||
|
|
config = json.load(f)
|
||
|
|
return config
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Failed to load config from {self.config_path}: {e}")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
async def create_collectors(self) -> bool:
|
||
|
|
"""Create collectors for all enabled trading pairs."""
|
||
|
|
try:
|
||
|
|
enabled_pairs = [
|
||
|
|
pair for pair in self.config['trading_pairs']
|
||
|
|
if pair.get('enabled', True)
|
||
|
|
]
|
||
|
|
|
||
|
|
self.logger.info(f"🎯 Creating collectors for {len(enabled_pairs)} trading pairs...")
|
||
|
|
|
||
|
|
for pair_config in enabled_pairs:
|
||
|
|
symbol = pair_config['symbol']
|
||
|
|
data_types = [DataType(dt) for dt in pair_config.get('data_types', ['trade'])]
|
||
|
|
|
||
|
|
self.logger.info(f"📈 Creating collector for {symbol} with data types: {[dt.value for dt in data_types]}")
|
||
|
|
|
||
|
|
# Create custom candle processing config for 1m and 5m timeframes
|
||
|
|
# Note: 1s timeframes are not supported by the aggregation framework
|
||
|
|
candle_config = CandleProcessingConfig(
|
||
|
|
timeframes=['1m', '5m'],
|
||
|
|
emit_incomplete_candles=False, # Only complete candles
|
||
|
|
auto_save_candles=True
|
||
|
|
)
|
||
|
|
|
||
|
|
# Create custom data processor with 1m/5m timeframes
|
||
|
|
data_processor = OKXDataProcessor(
|
||
|
|
symbol=symbol,
|
||
|
|
config=candle_config,
|
||
|
|
component_name=f"okx_processor_{symbol.replace('-', '_').lower()}"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Create OKX collector with custom processor
|
||
|
|
collector = OKXCollector(
|
||
|
|
symbol=symbol,
|
||
|
|
data_types=data_types,
|
||
|
|
component_name=f"okx_collector_{symbol.replace('-', '_').lower()}",
|
||
|
|
auto_restart=self.config.get('data_collection', {}).get('auto_restart', True),
|
||
|
|
health_check_interval=self.config.get('data_collection', {}).get('health_check_interval', 30.0),
|
||
|
|
store_raw_data=self.config.get('data_collection', {}).get('store_raw_data', True)
|
||
|
|
)
|
||
|
|
|
||
|
|
# Replace the default data processor with our custom one
|
||
|
|
collector._data_processor = data_processor
|
||
|
|
|
||
|
|
# Add callbacks for processed data
|
||
|
|
data_processor.add_trade_callback(collector._on_trade_processed)
|
||
|
|
data_processor.add_candle_callback(collector._on_candle_processed)
|
||
|
|
|
||
|
|
# Add to manager
|
||
|
|
self.collector_manager.add_collector(collector)
|
||
|
|
self.collectors.append(collector)
|
||
|
|
self.statistics['collectors_created'] += 1
|
||
|
|
|
||
|
|
self.logger.info(f"✅ Collector created for {symbol} with 1m/5m timeframes")
|
||
|
|
|
||
|
|
self.logger.info(f"🎉 All {len(self.collectors)} collectors created successfully")
|
||
|
|
self.logger.info(f"📊 Collectors configured with 1m and 5m aggregation timeframes")
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
self.logger.error(f"❌ Failed to create collectors: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
async def start(self) -> bool:
|
||
|
|
"""Start all collectors and begin data collection."""
|
||
|
|
try:
|
||
|
|
self.start_time = time.time()
|
||
|
|
self.running = True
|
||
|
|
|
||
|
|
self.logger.info("🚀 Starting production data collection...")
|
||
|
|
|
||
|
|
# Initialize global database managers
|
||
|
|
self.logger.info("📊 Initializing database...")
|
||
|
|
init_database()
|
||
|
|
self.logger.info("✅ Database initialized successfully")
|
||
|
|
|
||
|
|
# Start collector manager
|
||
|
|
success = await self.collector_manager.start()
|
||
|
|
if not success:
|
||
|
|
self.logger.error("❌ Failed to start collector manager")
|
||
|
|
return False
|
||
|
|
|
||
|
|
self.logger.info("✅ All collectors started successfully")
|
||
|
|
self.logger.info("📊 Data collection is now active with built-in processing")
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
self.logger.error(f"❌ Failed to start collectors: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
async def stop(self) -> None:
|
||
|
|
"""Stop all collectors gracefully."""
|
||
|
|
try:
|
||
|
|
self.logger.info("🛑 Stopping production data collection...")
|
||
|
|
self.running = False
|
||
|
|
|
||
|
|
# Stop collector manager
|
||
|
|
await self.collector_manager.stop()
|
||
|
|
|
||
|
|
self.logger.info("✅ All collectors stopped gracefully")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
self.logger.error(f"❌ Error during shutdown: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
async def run_clean_production(duration_hours: float = 8.0):
|
||
|
|
"""Run production collector with clean output."""
|
||
|
|
|
||
|
|
duration_seconds = int(duration_hours * 3600)
|
||
|
|
|
||
|
|
# Global state for signal handling
|
||
|
|
shutdown_event = asyncio.Event()
|
||
|
|
manager = None
|
||
|
|
|
||
|
|
def signal_handler(signum, frame):
|
||
|
|
print(f"\n📡 Shutdown signal received, stopping gracefully...")
|
||
|
|
shutdown_event.set()
|
||
|
|
|
||
|
|
# Set up signal handlers
|
||
|
|
signal.signal(signal.SIGINT, signal_handler)
|
||
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Header
|
||
|
|
print("🚀 OKX PRODUCTION DATA COLLECTOR")
|
||
|
|
print("="*50)
|
||
|
|
print(f"⏱️ Duration: {duration_hours} hours")
|
||
|
|
print(f"📊 Timeframes: 1m and 5m candles")
|
||
|
|
print(f"💾 Database: Raw trades + aggregated candles")
|
||
|
|
print(f"📝 Logs: logs/ directory")
|
||
|
|
print("="*50)
|
||
|
|
|
||
|
|
# Create manager
|
||
|
|
print("🎯 Initializing collector...")
|
||
|
|
manager = ProductionManager("config/okx_config.json")
|
||
|
|
|
||
|
|
# Create collectors
|
||
|
|
if not await manager.create_collectors():
|
||
|
|
print("❌ Failed to create collectors")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Start data collection
|
||
|
|
print("🚀 Starting data collection...")
|
||
|
|
if not await manager.start():
|
||
|
|
print("❌ Failed to start data collection")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Running status
|
||
|
|
start_time = time.time()
|
||
|
|
print("✅ Data collection active!")
|
||
|
|
print(f"📈 Collecting: {len(manager.collectors)} trading pairs")
|
||
|
|
print(f"📊 Monitor: python scripts/monitor_clean.py")
|
||
|
|
print("-" * 50)
|
||
|
|
|
||
|
|
# Main monitoring loop
|
||
|
|
last_update = time.time()
|
||
|
|
update_interval = 600 # Update every 10 minutes
|
||
|
|
|
||
|
|
while not shutdown_event.is_set():
|
||
|
|
# Wait for shutdown or timeout
|
||
|
|
try:
|
||
|
|
await asyncio.wait_for(shutdown_event.wait(), timeout=1.0)
|
||
|
|
break
|
||
|
|
except asyncio.TimeoutError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
# Check duration
|
||
|
|
current_time = time.time()
|
||
|
|
if current_time - start_time >= duration_seconds:
|
||
|
|
print(f"⏰ Completed {duration_hours} hour run")
|
||
|
|
break
|
||
|
|
|
||
|
|
# Periodic status update
|
||
|
|
if current_time - last_update >= update_interval:
|
||
|
|
elapsed_hours = (current_time - start_time) / 3600
|
||
|
|
remaining_hours = duration_hours - elapsed_hours
|
||
|
|
print(f"⏱️ Runtime: {elapsed_hours:.1f}h | Remaining: {remaining_hours:.1f}h")
|
||
|
|
last_update = current_time
|
||
|
|
|
||
|
|
# Final summary
|
||
|
|
total_runtime = (time.time() - start_time) / 3600
|
||
|
|
print(f"\n📊 COLLECTION COMPLETE")
|
||
|
|
print(f"⏱️ Total runtime: {total_runtime:.2f} hours")
|
||
|
|
print(f"📈 Collectors: {len(manager.collectors)} active")
|
||
|
|
print(f"📋 View results: python scripts/monitor_clean.py")
|
||
|
|
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
finally:
|
||
|
|
if manager:
|
||
|
|
print("🛑 Stopping collectors...")
|
||
|
|
await manager.stop()
|
||
|
|
print("✅ Shutdown complete")
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""Main entry point."""
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description="Clean Production OKX Data Collector",
|
||
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
|
|
epilog="""
|
||
|
|
Examples:
|
||
|
|
# Run for 8 hours
|
||
|
|
python scripts/production_clean.py --hours 8
|
||
|
|
|
||
|
|
# Run overnight (12 hours)
|
||
|
|
python scripts/production_clean.py --hours 12
|
||
|
|
"""
|
||
|
|
)
|
||
|
|
|
||
|
|
parser.add_argument(
|
||
|
|
'--hours',
|
||
|
|
type=float,
|
||
|
|
default=8.0,
|
||
|
|
help='Collection duration in hours (default: 8.0)'
|
||
|
|
)
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
if args.hours <= 0:
|
||
|
|
print("❌ Duration must be positive")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
try:
|
||
|
|
success = asyncio.run(run_clean_production(args.hours))
|
||
|
|
sys.exit(0 if success else 1)
|
||
|
|
except KeyboardInterrupt:
|
||
|
|
print("\n👋 Interrupted by user")
|
||
|
|
sys.exit(0)
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Fatal error: {e}")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|