Add complete time series aggregation example and refactor OKXCollector for repository pattern
- Introduced `example_complete_series_aggregation.py` to demonstrate time series aggregation, emitting candles even when no trades occur. - Implemented `CompleteSeriesProcessor` extending `RealTimeCandleProcessor` to handle time-based candle emission and empty candle creation. - Refactored `OKXCollector` to utilize the new repository pattern for database operations, enhancing modularity and maintainability. - Updated database operations to centralize data handling through `DatabaseOperations`, improving error handling and logging. - Enhanced documentation to include details on the new aggregation example and repository pattern implementation, ensuring clarity for users.
This commit is contained in:
@@ -76,32 +76,49 @@ class CleanMonitor:
|
||||
MarketData.created_at >= cutoff
|
||||
).scalar()
|
||||
|
||||
# Timeframe breakdown
|
||||
# Timeframe breakdown with improved sorting
|
||||
timeframes = session.query(
|
||||
MarketData.timeframe,
|
||||
func.count(MarketData.id)
|
||||
).group_by(MarketData.timeframe).all()
|
||||
|
||||
# Latest prices
|
||||
# Latest prices - prioritize shorter timeframes for more recent data
|
||||
latest_prices = {}
|
||||
for symbol in ['BTC-USDT', 'ETH-USDT']:
|
||||
latest = session.query(MarketData).filter(
|
||||
MarketData.symbol == symbol,
|
||||
MarketData.timeframe == '1m'
|
||||
).order_by(desc(MarketData.created_at)).first()
|
||||
# Try to get latest price from shortest available timeframe
|
||||
price_timeframes = ['5s', '1s', '1m', '5m', '15m', '1h'] # Prefer shorter timeframes
|
||||
latest = None
|
||||
|
||||
for tf in price_timeframes:
|
||||
latest = session.query(MarketData).filter(
|
||||
MarketData.symbol == symbol,
|
||||
MarketData.timeframe == tf
|
||||
).order_by(desc(MarketData.created_at)).first()
|
||||
|
||||
if latest:
|
||||
break # Use first available timeframe
|
||||
|
||||
if latest:
|
||||
latest_prices[symbol] = {
|
||||
'price': float(latest.close),
|
||||
'time': latest.timestamp
|
||||
'time': latest.timestamp,
|
||||
'timeframe': latest.timeframe
|
||||
}
|
||||
|
||||
# Second-based activity monitoring (last 1 minute for high-frequency data)
|
||||
recent_cutoff_1min = datetime.now(timezone.utc) - timedelta(minutes=1)
|
||||
recent_second_candles = session.query(func.count(MarketData.id)).filter(
|
||||
MarketData.created_at >= recent_cutoff_1min,
|
||||
MarketData.timeframe.in_(['1s', '5s', '10s', '15s', '30s'])
|
||||
).scalar()
|
||||
|
||||
return {
|
||||
'raw_count': raw_count,
|
||||
'candle_count': candle_count,
|
||||
'raw_timespan': (raw_newest - raw_oldest).total_seconds() / 3600 if raw_oldest and raw_newest else 0,
|
||||
'recent_raw': recent_raw,
|
||||
'recent_candles': recent_candles,
|
||||
'recent_second_candles': recent_second_candles,
|
||||
'timeframes': dict(timeframes),
|
||||
'latest_prices': latest_prices
|
||||
}
|
||||
@@ -110,6 +127,25 @@ class CleanMonitor:
|
||||
self.logger.error(f"Error getting stats: {e}")
|
||||
return {}
|
||||
|
||||
def _sort_timeframes(self, timeframes: dict) -> dict:
|
||||
"""Sort timeframes logically: seconds -> minutes -> hours -> days."""
|
||||
def timeframe_sort_key(tf):
|
||||
"""Generate sort key for timeframe."""
|
||||
import re
|
||||
match = re.match(r'^(\d+)([smhd])$', tf.lower())
|
||||
if not match:
|
||||
return (999, 999) # Unknown formats last
|
||||
|
||||
number = int(match.group(1))
|
||||
unit = match.group(2)
|
||||
|
||||
# Unit priority: s=0, m=1, h=2, d=3
|
||||
unit_priority = {'s': 0, 'm': 1, 'h': 2, 'd': 3}.get(unit, 999)
|
||||
return (unit_priority, number)
|
||||
|
||||
sorted_items = sorted(timeframes.items(), key=lambda x: timeframe_sort_key(x[0]))
|
||||
return dict(sorted_items)
|
||||
|
||||
def print_status(self):
|
||||
"""Print clean status summary."""
|
||||
stats = self.get_summary_stats()
|
||||
@@ -128,27 +164,53 @@ class CleanMonitor:
|
||||
|
||||
print(f"📈 Raw Data: {raw_count:,} entries ({timespan:.1f} hours)")
|
||||
|
||||
# Candle breakdown
|
||||
# Candle breakdown with improved sorting and formatting
|
||||
timeframes = stats.get('timeframes', {})
|
||||
if timeframes:
|
||||
tf_summary = ", ".join([f"{tf}:{count}" for tf, count in timeframes.items()])
|
||||
print(f"📊 Candles: {candle_count:,} total ({tf_summary})")
|
||||
sorted_timeframes = self._sort_timeframes(timeframes)
|
||||
|
||||
# Group by type for better display
|
||||
second_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('s')}
|
||||
minute_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('m')}
|
||||
hour_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('h')}
|
||||
day_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('d')}
|
||||
|
||||
# Build display string
|
||||
tf_parts = []
|
||||
if second_tfs:
|
||||
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in second_tfs.items()]))
|
||||
if minute_tfs:
|
||||
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in minute_tfs.items()]))
|
||||
if hour_tfs:
|
||||
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in hour_tfs.items()]))
|
||||
if day_tfs:
|
||||
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in day_tfs.items()]))
|
||||
|
||||
tf_summary = " | ".join(tf_parts)
|
||||
print(f"📊 Candles: {candle_count:,} total")
|
||||
print(f" {tf_summary}")
|
||||
else:
|
||||
print(f"📊 Candles: {candle_count:,} total")
|
||||
|
||||
# Recent activity
|
||||
# Enhanced recent activity with second-based monitoring
|
||||
recent_raw = stats.get('recent_raw', 0)
|
||||
recent_candles = stats.get('recent_candles', 0)
|
||||
print(f"🕐 Recent (5m): {recent_raw:,} raw, {recent_candles} candles")
|
||||
recent_second_candles = stats.get('recent_second_candles', 0)
|
||||
|
||||
# Latest prices
|
||||
print(f"🕐 Recent Activity:")
|
||||
print(f" 5m: {recent_raw:,} raw trades, {recent_candles} total candles")
|
||||
if recent_second_candles > 0:
|
||||
print(f" 1m: {recent_second_candles} second-based candles (1s-30s)")
|
||||
|
||||
# Latest prices with timeframe information
|
||||
latest_prices = stats.get('latest_prices', {})
|
||||
if latest_prices:
|
||||
print("💰 Latest Prices:")
|
||||
for symbol, data in latest_prices.items():
|
||||
price = data['price']
|
||||
time_str = data['time'].strftime('%H:%M:%S')
|
||||
print(f" {symbol}: ${price:,.2f} at {time_str}")
|
||||
timeframe = data.get('timeframe', '1m')
|
||||
print(f" {symbol}: ${price:,.2f} at {time_str} ({timeframe})")
|
||||
|
||||
print("="*50)
|
||||
|
||||
|
||||
@@ -100,12 +100,14 @@ class ProductionManager:
|
||||
symbol = pair_config['symbol']
|
||||
data_types = [DataType(dt) for dt in pair_config.get('data_types', ['trade'])]
|
||||
|
||||
self.logger.info(f"📈 Creating collector for {symbol} with data types: {[dt.value for dt in data_types]}")
|
||||
# Get timeframes from config file for this trading pair
|
||||
config_timeframes = pair_config.get('timeframes', ['1m', '5m'])
|
||||
|
||||
# Create custom candle processing config for 1m and 5m timeframes
|
||||
# Note: 1s timeframes are not supported by the aggregation framework
|
||||
self.logger.info(f"📈 Creating collector for {symbol} with timeframes: {config_timeframes}")
|
||||
|
||||
# Create custom candle processing config using timeframes from config
|
||||
candle_config = CandleProcessingConfig(
|
||||
timeframes=['1m', '5m'],
|
||||
timeframes=config_timeframes,
|
||||
emit_incomplete_candles=False, # Only complete candles
|
||||
auto_save_candles=True
|
||||
)
|
||||
@@ -142,10 +144,14 @@ class ProductionManager:
|
||||
self.collectors.append(collector)
|
||||
self.statistics['collectors_created'] += 1
|
||||
|
||||
self.logger.info(f"✅ Collector created for {symbol} with 1m/5m timeframes and error-only logging")
|
||||
self.logger.info(f"✅ Collector created for {symbol} with {'/'.join(config_timeframes)} timeframes")
|
||||
|
||||
self.logger.info(f"🎉 All {len(self.collectors)} collectors created successfully with error-only logging")
|
||||
self.logger.info(f"📊 Collectors configured with 1m and 5m aggregation timeframes")
|
||||
self.logger.info(f"🎉 All {len(self.collectors)} collectors created successfully")
|
||||
# Get unique timeframes across all collectors for summary
|
||||
all_timeframes = set()
|
||||
for pair in enabled_pairs:
|
||||
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
|
||||
self.logger.info(f"📊 Collectors configured with timeframes: {', '.join(sorted(all_timeframes))}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -210,6 +216,20 @@ async def run_clean_production(duration_hours: Optional[float] = None):
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
# Read config to show actual timeframes in banner
|
||||
config_path = "config/okx_config.json"
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
# Get unique timeframes from all enabled trading pairs
|
||||
all_timeframes = set()
|
||||
for pair in config.get('trading_pairs', []):
|
||||
if pair.get('enabled', True):
|
||||
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
|
||||
timeframes_str = ', '.join(sorted(all_timeframes))
|
||||
except:
|
||||
timeframes_str = "configured timeframes"
|
||||
|
||||
# Header
|
||||
print("🚀 OKX PRODUCTION DATA COLLECTOR")
|
||||
print("="*50)
|
||||
@@ -217,7 +237,7 @@ async def run_clean_production(duration_hours: Optional[float] = None):
|
||||
print(f"⏱️ Duration: {duration_hours} hours")
|
||||
else:
|
||||
print(f"⏱️ Duration: Indefinite (until stopped)")
|
||||
print(f"📊 Timeframes: 1m and 5m candles")
|
||||
print(f"📊 Timeframes: {timeframes_str}")
|
||||
print(f"💾 Database: Raw trades + aggregated candles")
|
||||
print(f"📝 Logs: logs/ directory")
|
||||
print("="*50)
|
||||
|
||||
Reference in New Issue
Block a user