Add complete time series aggregation example and refactor OKXCollector for repository pattern

- Introduced `example_complete_series_aggregation.py` to demonstrate time series aggregation, emitting candles even when no trades occur.
- Implemented `CompleteSeriesProcessor` extending `RealTimeCandleProcessor` to handle time-based candle emission and empty candle creation.
- Refactored `OKXCollector` to utilize the new repository pattern for database operations, enhancing modularity and maintainability.
- Updated database operations to centralize data handling through `DatabaseOperations`, improving error handling and logging.
- Enhanced documentation to include details on the new aggregation example and repository pattern implementation, ensuring clarity for users.
This commit is contained in:
Vasily.onl
2025-06-02 13:27:01 +08:00
parent 5b4547edd5
commit cffc54b648
11 changed files with 1460 additions and 149 deletions

View File

@@ -76,32 +76,49 @@ class CleanMonitor:
MarketData.created_at >= cutoff
).scalar()
# Timeframe breakdown
# Timeframe breakdown with improved sorting
timeframes = session.query(
MarketData.timeframe,
func.count(MarketData.id)
).group_by(MarketData.timeframe).all()
# Latest prices
# Latest prices - prioritize shorter timeframes for more recent data
latest_prices = {}
for symbol in ['BTC-USDT', 'ETH-USDT']:
latest = session.query(MarketData).filter(
MarketData.symbol == symbol,
MarketData.timeframe == '1m'
).order_by(desc(MarketData.created_at)).first()
# Try to get latest price from shortest available timeframe
price_timeframes = ['5s', '1s', '1m', '5m', '15m', '1h'] # Prefer shorter timeframes
latest = None
for tf in price_timeframes:
latest = session.query(MarketData).filter(
MarketData.symbol == symbol,
MarketData.timeframe == tf
).order_by(desc(MarketData.created_at)).first()
if latest:
break # Use first available timeframe
if latest:
latest_prices[symbol] = {
'price': float(latest.close),
'time': latest.timestamp
'time': latest.timestamp,
'timeframe': latest.timeframe
}
# Second-based activity monitoring (last 1 minute for high-frequency data)
recent_cutoff_1min = datetime.now(timezone.utc) - timedelta(minutes=1)
recent_second_candles = session.query(func.count(MarketData.id)).filter(
MarketData.created_at >= recent_cutoff_1min,
MarketData.timeframe.in_(['1s', '5s', '10s', '15s', '30s'])
).scalar()
return {
'raw_count': raw_count,
'candle_count': candle_count,
'raw_timespan': (raw_newest - raw_oldest).total_seconds() / 3600 if raw_oldest and raw_newest else 0,
'recent_raw': recent_raw,
'recent_candles': recent_candles,
'recent_second_candles': recent_second_candles,
'timeframes': dict(timeframes),
'latest_prices': latest_prices
}
@@ -110,6 +127,25 @@ class CleanMonitor:
self.logger.error(f"Error getting stats: {e}")
return {}
def _sort_timeframes(self, timeframes: dict) -> dict:
"""Sort timeframes logically: seconds -> minutes -> hours -> days."""
def timeframe_sort_key(tf):
"""Generate sort key for timeframe."""
import re
match = re.match(r'^(\d+)([smhd])$', tf.lower())
if not match:
return (999, 999) # Unknown formats last
number = int(match.group(1))
unit = match.group(2)
# Unit priority: s=0, m=1, h=2, d=3
unit_priority = {'s': 0, 'm': 1, 'h': 2, 'd': 3}.get(unit, 999)
return (unit_priority, number)
sorted_items = sorted(timeframes.items(), key=lambda x: timeframe_sort_key(x[0]))
return dict(sorted_items)
def print_status(self):
"""Print clean status summary."""
stats = self.get_summary_stats()
@@ -128,27 +164,53 @@ class CleanMonitor:
print(f"📈 Raw Data: {raw_count:,} entries ({timespan:.1f} hours)")
# Candle breakdown
# Candle breakdown with improved sorting and formatting
timeframes = stats.get('timeframes', {})
if timeframes:
tf_summary = ", ".join([f"{tf}:{count}" for tf, count in timeframes.items()])
print(f"📊 Candles: {candle_count:,} total ({tf_summary})")
sorted_timeframes = self._sort_timeframes(timeframes)
# Group by type for better display
second_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('s')}
minute_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('m')}
hour_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('h')}
day_tfs = {k: v for k, v in sorted_timeframes.items() if k.endswith('d')}
# Build display string
tf_parts = []
if second_tfs:
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in second_tfs.items()]))
if minute_tfs:
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in minute_tfs.items()]))
if hour_tfs:
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in hour_tfs.items()]))
if day_tfs:
tf_parts.append(" ".join([f"{tf}:{count}" for tf, count in day_tfs.items()]))
tf_summary = " | ".join(tf_parts)
print(f"📊 Candles: {candle_count:,} total")
print(f" {tf_summary}")
else:
print(f"📊 Candles: {candle_count:,} total")
# Recent activity
# Enhanced recent activity with second-based monitoring
recent_raw = stats.get('recent_raw', 0)
recent_candles = stats.get('recent_candles', 0)
print(f"🕐 Recent (5m): {recent_raw:,} raw, {recent_candles} candles")
recent_second_candles = stats.get('recent_second_candles', 0)
# Latest prices
print(f"🕐 Recent Activity:")
print(f" 5m: {recent_raw:,} raw trades, {recent_candles} total candles")
if recent_second_candles > 0:
print(f" 1m: {recent_second_candles} second-based candles (1s-30s)")
# Latest prices with timeframe information
latest_prices = stats.get('latest_prices', {})
if latest_prices:
print("💰 Latest Prices:")
for symbol, data in latest_prices.items():
price = data['price']
time_str = data['time'].strftime('%H:%M:%S')
print(f" {symbol}: ${price:,.2f} at {time_str}")
timeframe = data.get('timeframe', '1m')
print(f" {symbol}: ${price:,.2f} at {time_str} ({timeframe})")
print("="*50)

View File

@@ -100,12 +100,14 @@ class ProductionManager:
symbol = pair_config['symbol']
data_types = [DataType(dt) for dt in pair_config.get('data_types', ['trade'])]
self.logger.info(f"📈 Creating collector for {symbol} with data types: {[dt.value for dt in data_types]}")
# Get timeframes from config file for this trading pair
config_timeframes = pair_config.get('timeframes', ['1m', '5m'])
# Create custom candle processing config for 1m and 5m timeframes
# Note: 1s timeframes are not supported by the aggregation framework
self.logger.info(f"📈 Creating collector for {symbol} with timeframes: {config_timeframes}")
# Create custom candle processing config using timeframes from config
candle_config = CandleProcessingConfig(
timeframes=['1m', '5m'],
timeframes=config_timeframes,
emit_incomplete_candles=False, # Only complete candles
auto_save_candles=True
)
@@ -142,10 +144,14 @@ class ProductionManager:
self.collectors.append(collector)
self.statistics['collectors_created'] += 1
self.logger.info(f"✅ Collector created for {symbol} with 1m/5m timeframes and error-only logging")
self.logger.info(f"✅ Collector created for {symbol} with {'/'.join(config_timeframes)} timeframes")
self.logger.info(f"🎉 All {len(self.collectors)} collectors created successfully with error-only logging")
self.logger.info(f"📊 Collectors configured with 1m and 5m aggregation timeframes")
self.logger.info(f"🎉 All {len(self.collectors)} collectors created successfully")
# Get unique timeframes across all collectors for summary
all_timeframes = set()
for pair in enabled_pairs:
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
self.logger.info(f"📊 Collectors configured with timeframes: {', '.join(sorted(all_timeframes))}")
return True
except Exception as e:
@@ -210,6 +216,20 @@ async def run_clean_production(duration_hours: Optional[float] = None):
signal.signal(signal.SIGTERM, signal_handler)
try:
# Read config to show actual timeframes in banner
config_path = "config/okx_config.json"
try:
with open(config_path, 'r') as f:
config = json.load(f)
# Get unique timeframes from all enabled trading pairs
all_timeframes = set()
for pair in config.get('trading_pairs', []):
if pair.get('enabled', True):
all_timeframes.update(pair.get('timeframes', ['1m', '5m']))
timeframes_str = ', '.join(sorted(all_timeframes))
except:
timeframes_str = "configured timeframes"
# Header
print("🚀 OKX PRODUCTION DATA COLLECTOR")
print("="*50)
@@ -217,7 +237,7 @@ async def run_clean_production(duration_hours: Optional[float] = None):
print(f"⏱️ Duration: {duration_hours} hours")
else:
print(f"⏱️ Duration: Indefinite (until stopped)")
print(f"📊 Timeframes: 1m and 5m candles")
print(f"📊 Timeframes: {timeframes_str}")
print(f"💾 Database: Raw trades + aggregated candles")
print(f"📝 Logs: logs/ directory")
print("="*50)