- Introduced `BaseDataCollector` and `CollectorManager` classes for standardized data collection and centralized management. - Added health monitoring features, including auto-restart capabilities and detailed status reporting for collectors. - Updated `env.template` to include new logging and health check configurations. - Enhanced documentation in `docs/data_collectors.md` to provide comprehensive guidance on the new data collection system. - Added unit tests for `BaseDataCollector` and `CollectorManager` to ensure reliability and functionality.
412 lines
15 KiB
Python
412 lines
15 KiB
Python
"""
|
||
Demonstration of running multiple data collectors in parallel.
|
||
|
||
This example shows how to set up and manage multiple collectors simultaneously,
|
||
each collecting data from different exchanges or different symbols.
|
||
"""
|
||
|
||
import asyncio
|
||
from datetime import datetime, timezone
|
||
from typing import Dict, Any
|
||
|
||
from data import (
|
||
BaseDataCollector, DataType, CollectorStatus, MarketDataPoint,
|
||
CollectorManager, CollectorConfig
|
||
)
|
||
|
||
|
||
class DemoExchangeCollector(BaseDataCollector):
|
||
"""Demo collector simulating different exchanges."""
|
||
|
||
def __init__(self,
|
||
exchange_name: str,
|
||
symbols: list,
|
||
message_interval: float = 1.0,
|
||
base_price: float = 50000):
|
||
"""
|
||
Initialize demo collector.
|
||
|
||
Args:
|
||
exchange_name: Name of the exchange (okx, binance, coinbase, etc.)
|
||
symbols: Trading symbols to collect
|
||
message_interval: Seconds between simulated messages
|
||
base_price: Base price for simulation
|
||
"""
|
||
super().__init__(exchange_name, symbols, [DataType.TICKER])
|
||
self.message_interval = message_interval
|
||
self.base_price = base_price
|
||
self.connected = False
|
||
self.subscribed = False
|
||
self.message_count = 0
|
||
|
||
async def connect(self) -> bool:
|
||
"""Simulate connection to exchange."""
|
||
print(f"🔌 [{self.exchange_name.upper()}] Connecting...")
|
||
await asyncio.sleep(0.2) # Simulate connection delay
|
||
self.connected = True
|
||
print(f"✅ [{self.exchange_name.upper()}] Connected successfully")
|
||
return True
|
||
|
||
async def disconnect(self) -> None:
|
||
"""Simulate disconnection from exchange."""
|
||
print(f"🔌 [{self.exchange_name.upper()}] Disconnecting...")
|
||
await asyncio.sleep(0.1)
|
||
self.connected = False
|
||
self.subscribed = False
|
||
print(f"❌ [{self.exchange_name.upper()}] Disconnected")
|
||
|
||
async def subscribe_to_data(self, symbols: list, data_types: list) -> bool:
|
||
"""Simulate subscription to data streams."""
|
||
if not self.connected:
|
||
return False
|
||
|
||
print(f"📡 [{self.exchange_name.upper()}] Subscribing to {len(symbols)} symbols")
|
||
await asyncio.sleep(0.1)
|
||
self.subscribed = True
|
||
return True
|
||
|
||
async def unsubscribe_from_data(self, symbols: list, data_types: list) -> bool:
|
||
"""Simulate unsubscription from data streams."""
|
||
print(f"📡 [{self.exchange_name.upper()}] Unsubscribing from data streams")
|
||
self.subscribed = False
|
||
return True
|
||
|
||
async def _process_message(self, message: Any) -> MarketDataPoint:
|
||
"""Process simulated market data message."""
|
||
self.message_count += 1
|
||
|
||
# Create realistic price variation
|
||
price_variation = (self.message_count % 100 - 50) * 10
|
||
current_price = self.base_price + price_variation
|
||
|
||
data_point = MarketDataPoint(
|
||
exchange=self.exchange_name,
|
||
symbol=message['symbol'],
|
||
timestamp=datetime.now(timezone.utc),
|
||
data_type=DataType.TICKER,
|
||
data={
|
||
'price': current_price,
|
||
'volume': message.get('volume', 1.0 + (self.message_count % 10) * 0.1),
|
||
'bid': current_price - 0.5,
|
||
'ask': current_price + 0.5,
|
||
'timestamp': datetime.now(timezone.utc).isoformat()
|
||
}
|
||
)
|
||
|
||
return data_point
|
||
|
||
async def _handle_messages(self) -> None:
|
||
"""Simulate receiving and processing messages."""
|
||
if not self.connected or not self.subscribed:
|
||
await asyncio.sleep(0.1)
|
||
return
|
||
|
||
# Process each symbol
|
||
for symbol in self.symbols:
|
||
try:
|
||
# Create simulated message
|
||
simulated_message = {
|
||
'symbol': symbol,
|
||
'volume': 1.5 + (self.message_count % 5) * 0.2
|
||
}
|
||
|
||
# Process the message
|
||
data_point = await self._process_message(simulated_message)
|
||
if data_point:
|
||
self._stats['messages_processed'] += 1
|
||
await self._notify_callbacks(data_point)
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Error processing message for {symbol}: {e}")
|
||
raise e
|
||
|
||
# Wait before next batch of messages
|
||
await asyncio.sleep(self.message_interval)
|
||
|
||
|
||
def create_data_callback(exchange_name: str):
|
||
"""Create a data callback function for a specific exchange."""
|
||
|
||
def data_callback(data_point: MarketDataPoint):
|
||
print(f"📊 {exchange_name.upper():8} | {data_point.symbol:10} | "
|
||
f"${data_point.data.get('price', 0):8.2f} | "
|
||
f"Vol: {data_point.data.get('volume', 0):.2f} | "
|
||
f"{data_point.timestamp.strftime('%H:%M:%S')}")
|
||
|
||
return data_callback
|
||
|
||
|
||
async def demo_parallel_collectors():
|
||
"""Demonstrate running multiple collectors in parallel."""
|
||
print("=" * 80)
|
||
print("🚀 PARALLEL COLLECTORS DEMONSTRATION")
|
||
print("=" * 80)
|
||
print("Running multiple exchange collectors simultaneously...")
|
||
print()
|
||
|
||
# Create manager
|
||
manager = CollectorManager(
|
||
"parallel_demo_manager",
|
||
global_health_check_interval=10.0 # Check every 10 seconds
|
||
)
|
||
|
||
# Define exchange configurations
|
||
exchange_configs = [
|
||
{
|
||
'name': 'okx',
|
||
'symbols': ['BTC-USDT', 'ETH-USDT'],
|
||
'interval': 1.0,
|
||
'base_price': 45000
|
||
},
|
||
{
|
||
'name': 'binance',
|
||
'symbols': ['BTC-USDT', 'ETH-USDT', 'SOL-USDT'],
|
||
'interval': 1.5,
|
||
'base_price': 45100
|
||
},
|
||
{
|
||
'name': 'coinbase',
|
||
'symbols': ['BTC-USD', 'ETH-USD'],
|
||
'interval': 2.0,
|
||
'base_price': 44900
|
||
},
|
||
{
|
||
'name': 'kraken',
|
||
'symbols': ['XBTUSD', 'ETHUSD'],
|
||
'interval': 1.2,
|
||
'base_price': 45050
|
||
}
|
||
]
|
||
|
||
# Create and configure collectors
|
||
for config in exchange_configs:
|
||
# Create collector
|
||
collector = DemoExchangeCollector(
|
||
exchange_name=config['name'],
|
||
symbols=config['symbols'],
|
||
message_interval=config['interval'],
|
||
base_price=config['base_price']
|
||
)
|
||
|
||
# Add data callback
|
||
callback = create_data_callback(config['name'])
|
||
collector.add_data_callback(DataType.TICKER, callback)
|
||
|
||
# Add to manager with configuration
|
||
collector_config = CollectorConfig(
|
||
name=f"{config['name']}_collector",
|
||
exchange=config['name'],
|
||
symbols=config['symbols'],
|
||
data_types=['ticker'],
|
||
auto_restart=True,
|
||
health_check_interval=15.0,
|
||
enabled=True
|
||
)
|
||
|
||
manager.add_collector(collector, collector_config)
|
||
print(f"➕ Added {config['name'].upper()} collector with {len(config['symbols'])} symbols")
|
||
|
||
print(f"\n📝 Total collectors added: {len(manager.list_collectors())}")
|
||
print()
|
||
|
||
# Start all collectors in parallel
|
||
print("🏁 Starting all collectors...")
|
||
start_time = asyncio.get_event_loop().time()
|
||
|
||
success = await manager.start()
|
||
if not success:
|
||
print("❌ Failed to start collector manager")
|
||
return
|
||
|
||
startup_time = asyncio.get_event_loop().time() - start_time
|
||
print(f"✅ All collectors started in {startup_time:.2f} seconds")
|
||
print()
|
||
|
||
print("📊 DATA STREAM (All exchanges running in parallel):")
|
||
print("-" * 80)
|
||
|
||
# Monitor for a period
|
||
monitoring_duration = 30 # seconds
|
||
for i in range(monitoring_duration):
|
||
await asyncio.sleep(1)
|
||
|
||
# Print status every 10 seconds
|
||
if i % 10 == 0 and i > 0:
|
||
status = manager.get_status()
|
||
print()
|
||
print(f"⏰ STATUS UPDATE ({i}s):")
|
||
print(f" Running collectors: {len(manager.get_running_collectors())}")
|
||
print(f" Failed collectors: {len(manager.get_failed_collectors())}")
|
||
print(f" Total restarts: {status['statistics']['restarts_performed']}")
|
||
print("-" * 80)
|
||
|
||
# Final status report
|
||
print()
|
||
print("📈 FINAL STATUS REPORT:")
|
||
print("=" * 80)
|
||
|
||
status = manager.get_status()
|
||
print(f"Manager Status: {status['manager_status']}")
|
||
print(f"Total Collectors: {status['total_collectors']}")
|
||
print(f"Running Collectors: {len(manager.get_running_collectors())}")
|
||
print(f"Failed Collectors: {len(manager.get_failed_collectors())}")
|
||
print(f"Total Restarts: {status['statistics']['restarts_performed']}")
|
||
|
||
# Individual collector statistics
|
||
print("\n📊 INDIVIDUAL COLLECTOR STATS:")
|
||
for collector_name in manager.list_collectors():
|
||
collector_status = manager.get_collector_status(collector_name)
|
||
if collector_status:
|
||
stats = collector_status['status']['statistics']
|
||
health = collector_status['health']
|
||
|
||
print(f"\n{collector_name.upper()}:")
|
||
print(f" Status: {collector_status['status']['status']}")
|
||
print(f" Messages Processed: {stats['messages_processed']}")
|
||
print(f" Uptime: {stats.get('uptime_seconds', 0):.1f}s")
|
||
print(f" Errors: {stats['errors']}")
|
||
print(f" Healthy: {health['is_healthy']}")
|
||
|
||
# Stop all collectors
|
||
print("\n🛑 Stopping all collectors...")
|
||
await manager.stop()
|
||
print("✅ All collectors stopped successfully")
|
||
|
||
|
||
async def demo_dynamic_management():
|
||
"""Demonstrate dynamic addition/removal of collectors."""
|
||
print("\n" + "=" * 80)
|
||
print("🔄 DYNAMIC COLLECTOR MANAGEMENT")
|
||
print("=" * 80)
|
||
|
||
manager = CollectorManager("dynamic_manager")
|
||
|
||
# Start with one collector
|
||
collector1 = DemoExchangeCollector("exchange_a", ["BTC-USDT"], 1.0)
|
||
collector1.add_data_callback(DataType.TICKER, create_data_callback("exchange_a"))
|
||
manager.add_collector(collector1)
|
||
|
||
await manager.start()
|
||
print("✅ Started with 1 collector")
|
||
await asyncio.sleep(3)
|
||
|
||
# Add second collector while system is running
|
||
collector2 = DemoExchangeCollector("exchange_b", ["ETH-USDT"], 1.5)
|
||
collector2.add_data_callback(DataType.TICKER, create_data_callback("exchange_b"))
|
||
manager.add_collector(collector2)
|
||
|
||
print("➕ Added second collector while running")
|
||
await asyncio.sleep(3)
|
||
|
||
# Add third collector
|
||
collector3 = DemoExchangeCollector("exchange_c", ["SOL-USDT"], 2.0)
|
||
collector3.add_data_callback(DataType.TICKER, create_data_callback("exchange_c"))
|
||
manager.add_collector(collector3)
|
||
|
||
print("➕ Added third collector")
|
||
await asyncio.sleep(5)
|
||
|
||
# Show current status
|
||
print(f"\n📊 Current Status: {len(manager.get_running_collectors())} collectors running")
|
||
|
||
# Disable one collector
|
||
collectors = manager.list_collectors()
|
||
if len(collectors) > 1:
|
||
manager.disable_collector(collectors[1])
|
||
print(f"⏸️ Disabled collector: {collectors[1]}")
|
||
await asyncio.sleep(3)
|
||
|
||
# Re-enable
|
||
if len(collectors) > 1:
|
||
manager.enable_collector(collectors[1])
|
||
print(f"▶️ Re-enabled collector: {collectors[1]}")
|
||
await asyncio.sleep(3)
|
||
|
||
print(f"\n📊 Final Status: {len(manager.get_running_collectors())} collectors running")
|
||
|
||
await manager.stop()
|
||
print("✅ Dynamic management demo complete")
|
||
|
||
|
||
async def demo_performance_monitoring():
|
||
"""Demonstrate performance monitoring across multiple collectors."""
|
||
print("\n" + "=" * 80)
|
||
print("📈 PERFORMANCE MONITORING")
|
||
print("=" * 80)
|
||
|
||
manager = CollectorManager("performance_monitor", global_health_check_interval=5.0)
|
||
|
||
# Create collectors with different performance characteristics
|
||
configs = [
|
||
("fast_exchange", ["BTC-USDT"], 0.5), # Fast updates
|
||
("medium_exchange", ["ETH-USDT"], 1.0), # Medium updates
|
||
("slow_exchange", ["SOL-USDT"], 2.0), # Slow updates
|
||
]
|
||
|
||
for exchange, symbols, interval in configs:
|
||
collector = DemoExchangeCollector(exchange, symbols, interval)
|
||
collector.add_data_callback(DataType.TICKER, create_data_callback(exchange))
|
||
manager.add_collector(collector)
|
||
|
||
await manager.start()
|
||
print("✅ Started performance monitoring demo")
|
||
|
||
# Monitor performance for 20 seconds
|
||
for i in range(4):
|
||
await asyncio.sleep(5)
|
||
|
||
print(f"\n📊 PERFORMANCE SNAPSHOT ({(i+1)*5}s):")
|
||
print("-" * 60)
|
||
|
||
for collector_name in manager.list_collectors():
|
||
status = manager.get_collector_status(collector_name)
|
||
if status:
|
||
stats = status['status']['statistics']
|
||
health = status['health']
|
||
|
||
msg_rate = stats['messages_processed'] / max(stats.get('uptime_seconds', 1), 1)
|
||
|
||
print(f"{collector_name:15} | "
|
||
f"Rate: {msg_rate:5.1f}/s | "
|
||
f"Total: {stats['messages_processed']:4d} | "
|
||
f"Errors: {stats['errors']:2d} | "
|
||
f"Health: {'✅' if health['is_healthy'] else '❌'}")
|
||
|
||
await manager.stop()
|
||
print("\n✅ Performance monitoring demo complete")
|
||
|
||
|
||
async def main():
|
||
"""Run all parallel collector demonstrations."""
|
||
print("🎯 MULTIPLE COLLECTORS PARALLEL EXECUTION DEMO")
|
||
print("This demonstration shows the CollectorManager running multiple collectors simultaneously\n")
|
||
|
||
try:
|
||
# Main parallel demo
|
||
await demo_parallel_collectors()
|
||
|
||
# Dynamic management demo
|
||
await demo_dynamic_management()
|
||
|
||
# Performance monitoring demo
|
||
await demo_performance_monitoring()
|
||
|
||
print("\n" + "=" * 80)
|
||
print("🎉 ALL PARALLEL EXECUTION DEMOS COMPLETED!")
|
||
print("=" * 80)
|
||
print("\nKey takeaways:")
|
||
print("✅ Multiple collectors run truly in parallel")
|
||
print("✅ Each collector operates independently")
|
||
print("✅ Collectors can be added/removed while system is running")
|
||
print("✅ Centralized health monitoring across all collectors")
|
||
print("✅ Individual performance tracking per collector")
|
||
print("✅ Coordinated lifecycle management")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Demo failed with error: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main()) |