- Introduced `example_complete_series_aggregation.py` to demonstrate time series aggregation, emitting candles even when no trades occur. - Implemented `CompleteSeriesProcessor` extending `RealTimeCandleProcessor` to handle time-based candle emission and empty candle creation. - Refactored `OKXCollector` to utilize the new repository pattern for database operations, enhancing modularity and maintainability. - Updated database operations to centralize data handling through `DatabaseOperations`, improving error handling and logging. - Enhanced documentation to include details on the new aggregation example and repository pattern implementation, ensuring clarity for users.
362 lines
13 KiB
Python
362 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Clean Production OKX Data Collector
|
|
|
|
This script runs OKX data collection with minimal console output
|
|
and comprehensive file logging for production use.
|
|
|
|
Usage:
|
|
python scripts/production_clean.py [--hours duration]
|
|
|
|
Examples:
|
|
# Run for 8 hours
|
|
python scripts/production_clean.py --hours 8
|
|
|
|
# Run overnight (12 hours)
|
|
python scripts/production_clean.py --hours 12
|
|
"""
|
|
|
|
import asyncio
|
|
import argparse
|
|
import signal
|
|
import sys
|
|
import time
|
|
import json
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
|
|
# Add project root to path
|
|
project_root = Path(__file__).parent.parent
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
# Set environment variable to disable SQLAlchemy echo for clean production
|
|
import os
|
|
os.environ['DEBUG'] = 'false'
|
|
|
|
# Suppress SQLAlchemy verbose logging globally for production
|
|
import logging
|
|
logging.getLogger('sqlalchemy').setLevel(logging.CRITICAL)
|
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.CRITICAL)
|
|
logging.getLogger('sqlalchemy.pool').setLevel(logging.CRITICAL)
|
|
logging.getLogger('sqlalchemy.dialects').setLevel(logging.CRITICAL)
|
|
logging.getLogger('sqlalchemy.orm').setLevel(logging.CRITICAL)
|
|
|
|
from data.exchanges.okx import OKXCollector
|
|
from data.exchanges.okx.data_processor import OKXDataProcessor
|
|
from data.collector_manager import CollectorManager
|
|
from data.base_collector import DataType
|
|
from data.common import CandleProcessingConfig
|
|
from database.connection import init_database
|
|
from utils.logger import get_logger
|
|
|
|
|
|
class ProductionManager:
|
|
"""Production manager for OKX data collection."""
|
|
|
|
def __init__(self, config_path: str = "config/okx_config.json"):
|
|
self.config_path = config_path
|
|
self.config = self._load_config()
|
|
|
|
# Configure clean logging - minimal console output, error-only file logs
|
|
self.logger = get_logger("production_manager", verbose=False)
|
|
|
|
# Core components with error-only logging
|
|
self.collector_manager = CollectorManager(logger=self.logger, log_errors_only=True)
|
|
self.collectors: List[OKXCollector] = []
|
|
|
|
# Runtime state
|
|
self.running = False
|
|
self.start_time = None
|
|
self.statistics = {
|
|
'collectors_created': 0,
|
|
'uptime_seconds': 0
|
|
}
|
|
|
|
self.logger.info(f"🚀 Production Manager initialized with error-only logging")
|
|
self.logger.info(f"📁 Config: {config_path}")
|
|
|
|
def _load_config(self) -> dict:
|
|
"""Load configuration from JSON file."""
|
|
try:
|
|
with open(self.config_path, 'r') as f:
|
|
config = json.load(f)
|
|
return config
|
|
except Exception as e:
|
|
print(f"❌ Failed to load config from {self.config_path}: {e}")
|
|
sys.exit(1)
|
|
|
|
async def create_collectors(self) -> bool:
|
|
"""Create collectors for all enabled trading pairs."""
|
|
try:
|
|
enabled_pairs = [
|
|
pair for pair in self.config['trading_pairs']
|
|
if pair.get('enabled', True)
|
|
]
|
|
|
|
self.logger.info(f"🎯 Creating collectors for {len(enabled_pairs)} trading pairs...")
|
|
|
|
for pair_config in enabled_pairs:
|
|
symbol = pair_config['symbol']
|
|
data_types = [DataType(dt) for dt in pair_config.get('data_types', ['trade'])]
|
|
|
|
# Get timeframes from config file for this trading pair
|
|
config_timeframes = pair_config.get('timeframes', ['1m', '5m'])
|
|
|
|
self.logger.info(f"📈 Creating collector for {symbol} with timeframes: {config_timeframes}")
|
|
|
|
# Create custom candle processing config using timeframes from config
|
|
candle_config = CandleProcessingConfig(
|
|
timeframes=config_timeframes,
|
|
emit_incomplete_candles=False, # Only complete candles
|
|
auto_save_candles=True
|
|
)
|
|
|
|
# Create custom data processor with error-only logging
|
|
data_processor = OKXDataProcessor(
|
|
symbol=symbol,
|
|
config=candle_config,
|
|
component_name=f"okx_processor_{symbol.replace('-', '_').lower()}",
|
|
logger=self.logger
|
|
)
|
|
|
|
# Create OKX collector with error-only logging
|
|
collector = OKXCollector(
|
|
symbol=symbol,
|
|
data_types=data_types,
|
|
component_name=f"okx_collector_{symbol.replace('-', '_').lower()}",
|
|
auto_restart=False, # Disable auto-restart to prevent health check interference
|
|
health_check_interval=self.config.get('data_collection', {}).get('health_check_interval', 120.0),
|
|
store_raw_data=self.config.get('data_collection', {}).get('store_raw_data', True),
|
|
logger=self.logger,
|
|
log_errors_only=False # Enable full logging temporarily to debug WebSocket issues
|
|
)
|
|
|
|
# Replace the default data processor with our custom one
|
|
collector._data_processor = data_processor
|
|
|
|
# Add callbacks for processed data
|
|
data_processor.add_trade_callback(collector._on_trade_processed)
|
|
data_processor.add_candle_callback(collector._on_candle_processed)
|
|
|
|
# Add to manager
|
|
self.collector_manager.add_collector(collector)
|
|
self.collectors.append(collector)
|
|
self.statistics['collectors_created'] += 1
|
|
|
|
self.logger.info(f"✅ Collector created for {symbol} with {'/'.join(config_timeframes)} timeframes")
|
|
|
|
self.logger.info(f"🎉 All {len(self.collectors)} collectors created successfully")
|
|
# Get unique timeframes across all collectors for summary
|
|
all_timeframes = set()
|
|
for pair in enabled_pairs:
|
|
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
|
|
self.logger.info(f"📊 Collectors configured with timeframes: {', '.join(sorted(all_timeframes))}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"❌ Failed to create collectors: {e}")
|
|
return False
|
|
|
|
async def start(self) -> bool:
|
|
"""Start all collectors and begin data collection."""
|
|
try:
|
|
self.start_time = time.time()
|
|
self.running = True
|
|
|
|
self.logger.info("🚀 Starting production data collection...")
|
|
|
|
# Initialize global database managers
|
|
self.logger.info("📊 Initializing database...")
|
|
init_database()
|
|
self.logger.info("✅ Database initialized successfully")
|
|
|
|
# Start collector manager
|
|
success = await self.collector_manager.start()
|
|
if not success:
|
|
self.logger.error("❌ Failed to start collector manager")
|
|
return False
|
|
|
|
self.logger.info("✅ All collectors started successfully")
|
|
self.logger.info("📊 Data collection is now active with built-in processing")
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"❌ Failed to start collectors: {e}")
|
|
return False
|
|
|
|
async def stop(self) -> None:
|
|
"""Stop all collectors gracefully."""
|
|
try:
|
|
self.logger.info("🛑 Stopping production data collection...")
|
|
self.running = False
|
|
|
|
# Stop collector manager
|
|
await self.collector_manager.stop()
|
|
|
|
self.logger.info("✅ All collectors stopped gracefully")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"❌ Error during shutdown: {e}")
|
|
|
|
|
|
async def run_clean_production(duration_hours: Optional[float] = None):
|
|
"""Run production collector with clean output."""
|
|
|
|
# Global state for signal handling
|
|
shutdown_event = asyncio.Event()
|
|
manager = None
|
|
|
|
def signal_handler(signum, frame):
|
|
print(f"\n📡 Shutdown signal received, stopping gracefully...")
|
|
shutdown_event.set()
|
|
|
|
# Set up signal handlers
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
try:
|
|
# Read config to show actual timeframes in banner
|
|
config_path = "config/okx_config.json"
|
|
try:
|
|
with open(config_path, 'r') as f:
|
|
config = json.load(f)
|
|
# Get unique timeframes from all enabled trading pairs
|
|
all_timeframes = set()
|
|
for pair in config.get('trading_pairs', []):
|
|
if pair.get('enabled', True):
|
|
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
|
|
timeframes_str = ', '.join(sorted(all_timeframes))
|
|
except:
|
|
timeframes_str = "configured timeframes"
|
|
|
|
# Header
|
|
print("🚀 OKX PRODUCTION DATA COLLECTOR")
|
|
print("="*50)
|
|
if duration_hours:
|
|
print(f"⏱️ Duration: {duration_hours} hours")
|
|
else:
|
|
print(f"⏱️ Duration: Indefinite (until stopped)")
|
|
print(f"📊 Timeframes: {timeframes_str}")
|
|
print(f"💾 Database: Raw trades + aggregated candles")
|
|
print(f"📝 Logs: logs/ directory")
|
|
print("="*50)
|
|
|
|
# Create manager
|
|
print("🎯 Initializing collector...")
|
|
manager = ProductionManager("config/okx_config.json")
|
|
|
|
# Create collectors
|
|
if not await manager.create_collectors():
|
|
print("❌ Failed to create collectors")
|
|
return False
|
|
|
|
# Start data collection
|
|
print("🚀 Starting data collection...")
|
|
if not await manager.start():
|
|
print("❌ Failed to start data collection")
|
|
return False
|
|
|
|
# Running status
|
|
start_time = time.time()
|
|
print("✅ Data collection active!")
|
|
print(f"📈 Collecting: {len(manager.collectors)} trading pairs")
|
|
print(f"📊 Monitor: python scripts/monitor_clean.py")
|
|
if not duration_hours:
|
|
print("⏹️ Stop: Ctrl+C")
|
|
print("-" * 50)
|
|
|
|
# Main monitoring loop
|
|
last_update = time.time()
|
|
update_interval = 600 # Update every 10 minutes
|
|
|
|
while not shutdown_event.is_set():
|
|
# Wait for shutdown or timeout
|
|
try:
|
|
await asyncio.wait_for(shutdown_event.wait(), timeout=1.0)
|
|
break
|
|
except asyncio.TimeoutError:
|
|
pass
|
|
|
|
# Check duration if specified
|
|
current_time = time.time()
|
|
if duration_hours:
|
|
duration_seconds = int(duration_hours * 3600)
|
|
if current_time - start_time >= duration_seconds:
|
|
print(f"⏰ Completed {duration_hours} hour run")
|
|
break
|
|
|
|
# Periodic status update
|
|
if current_time - last_update >= update_interval:
|
|
elapsed_hours = (current_time - start_time) / 3600
|
|
if duration_hours:
|
|
remaining_hours = duration_hours - elapsed_hours
|
|
print(f"⏱️ Runtime: {elapsed_hours:.1f}h | Remaining: {remaining_hours:.1f}h")
|
|
else:
|
|
print(f"⏱️ Runtime: {elapsed_hours:.1f}h | Mode: Continuous")
|
|
last_update = current_time
|
|
|
|
# Final summary
|
|
total_runtime = (time.time() - start_time) / 3600
|
|
print(f"\n📊 COLLECTION COMPLETE")
|
|
print(f"⏱️ Total runtime: {total_runtime:.2f} hours")
|
|
print(f"📈 Collectors: {len(manager.collectors)} active")
|
|
print(f"📋 View results: python scripts/monitor_clean.py")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
return False
|
|
|
|
finally:
|
|
if manager:
|
|
print("🛑 Stopping collectors...")
|
|
await manager.stop()
|
|
print("✅ Shutdown complete")
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Clean Production OKX Data Collector",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Run indefinitely (until stopped with Ctrl+C)
|
|
python scripts/production_clean.py
|
|
|
|
# Run for 8 hours
|
|
python scripts/production_clean.py --hours 8
|
|
|
|
# Run overnight (12 hours)
|
|
python scripts/production_clean.py --hours 12
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--hours',
|
|
type=float,
|
|
default=None,
|
|
help='Collection duration in hours (default: indefinite until stopped manually)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.hours is not None and args.hours <= 0:
|
|
print("❌ Duration must be positive")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
success = asyncio.run(run_clean_production(args.hours))
|
|
sys.exit(0 if success else 1)
|
|
except KeyboardInterrupt:
|
|
print("\n👋 Interrupted by user")
|
|
sys.exit(0)
|
|
except Exception as e:
|
|
print(f"❌ Fatal error: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |