2025-06-02 14:23:08 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Data Collection Service
|
|
|
|
|
|
|
|
|
|
Production-ready service for cryptocurrency market data collection
|
|
|
|
|
with clean logging and robust error handling.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import signal
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import List, Optional, Dict, Any
|
|
|
|
|
import logging
|
Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.
These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00
|
|
|
import json
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
# Add project root to path
|
|
|
|
|
project_root = Path(__file__).parent.parent
|
|
|
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
|
|
|
|
|
|
# Set environment for clean production logging
|
|
|
|
|
import os
|
|
|
|
|
os.environ['DEBUG'] = 'false'
|
|
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
# Suppress verbose SQLAlchemy logging
|
2025-06-02 14:23:08 +08:00
|
|
|
logging.getLogger('sqlalchemy').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.pool').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.dialects').setLevel(logging.WARNING)
|
|
|
|
|
logging.getLogger('sqlalchemy.orm').setLevel(logging.WARNING)
|
|
|
|
|
|
Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.
These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00
|
|
|
from .collector_manager import CollectorManager
|
|
|
|
|
from config.collector_service_config import CollectorServiceConfig
|
|
|
|
|
from .collector_factory import CollectorFactory
|
2025-06-02 14:23:08 +08:00
|
|
|
from database.connection import init_database
|
|
|
|
|
from utils.logger import get_logger
|
Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.
These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00
|
|
|
from utils.async_task_manager import TaskManager
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class DataCollectionService:
|
2025-06-10 12:55:27 +08:00
|
|
|
"""Production data collection service with modular architecture."""
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
def __init__(self, config_path: str = "config/data_collection.json"):
|
|
|
|
|
"""Initialize the data collection service."""
|
|
|
|
|
self.config_path = config_path
|
2025-06-10 12:55:27 +08:00
|
|
|
self.logger = get_logger("data_collection_service", log_level="INFO", verbose=False)
|
2025-06-02 14:23:08 +08:00
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
# Initialize configuration and factory
|
Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.
These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00
|
|
|
self.service_config = CollectorServiceConfig(config_path, logger=self.logger)
|
2025-06-10 12:55:27 +08:00
|
|
|
self.config = self.service_config.load_config()
|
|
|
|
|
self.collector_factory = CollectorFactory(logger=self.logger)
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
# Core components
|
Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.
These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00
|
|
|
self.task_manager = TaskManager("data_collection_service", logger=self.logger)
|
2025-06-10 12:55:27 +08:00
|
|
|
self.collector_manager = CollectorManager(logger=self.logger, log_errors_only=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.collectors: List = []
|
|
|
|
|
|
|
|
|
|
# Service state
|
|
|
|
|
self.running = False
|
|
|
|
|
self.start_time = None
|
|
|
|
|
self.shutdown_event = asyncio.Event()
|
|
|
|
|
|
|
|
|
|
# Statistics for monitoring
|
|
|
|
|
self.stats = {
|
|
|
|
|
'collectors_created': 0,
|
|
|
|
|
'collectors_running': 0,
|
|
|
|
|
'total_uptime_seconds': 0,
|
|
|
|
|
'last_activity': None,
|
|
|
|
|
'errors_count': 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.logger.info("🚀 Data Collection Service initialized")
|
|
|
|
|
self.logger.info(f"📁 Configuration: {config_path}")
|
|
|
|
|
|
2025-06-10 13:12:13 +08:00
|
|
|
def _sanitize_error(self, message: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Sanitize error message to prevent leaking internal details.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
message: Original error message
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Sanitized error message
|
|
|
|
|
"""
|
|
|
|
|
# Remove sensitive patterns that might leak internal information
|
|
|
|
|
sensitive_patterns = [
|
|
|
|
|
'password=',
|
|
|
|
|
'token=',
|
|
|
|
|
'key=',
|
|
|
|
|
'secret=',
|
|
|
|
|
'auth=',
|
|
|
|
|
'api_key=',
|
|
|
|
|
'api_secret=',
|
|
|
|
|
'access_token=',
|
|
|
|
|
'refresh_token='
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
sanitized = message
|
|
|
|
|
for pattern in sensitive_patterns:
|
|
|
|
|
if pattern.lower() in sanitized.lower():
|
|
|
|
|
# Replace the value part after = with [REDACTED]
|
|
|
|
|
parts = sanitized.split(pattern)
|
|
|
|
|
if len(parts) > 1:
|
|
|
|
|
# Find the end of the value (space, comma, or end of string)
|
|
|
|
|
value_part = parts[1]
|
|
|
|
|
end_chars = [' ', ',', ')', ']', '}', '\n', '\t']
|
|
|
|
|
end_idx = len(value_part)
|
|
|
|
|
|
|
|
|
|
for char in end_chars:
|
|
|
|
|
char_idx = value_part.find(char)
|
|
|
|
|
if char_idx != -1 and char_idx < end_idx:
|
|
|
|
|
end_idx = char_idx
|
|
|
|
|
|
|
|
|
|
# Replace the value with [REDACTED]
|
|
|
|
|
sanitized = parts[0] + pattern + '[REDACTED]' + value_part[end_idx:]
|
|
|
|
|
|
|
|
|
|
return sanitized
|
|
|
|
|
|
2025-06-02 14:23:08 +08:00
|
|
|
async def initialize_collectors(self) -> bool:
|
|
|
|
|
"""Initialize all data collectors based on configuration."""
|
|
|
|
|
try:
|
2025-06-10 12:55:27 +08:00
|
|
|
collectors = await self.collector_factory.create_collectors_from_config(self.config)
|
2025-06-02 14:23:08 +08:00
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
if not collectors:
|
2025-06-10 13:12:13 +08:00
|
|
|
self.logger.error("❌ No collectors were successfully created", exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
return False
|
|
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
for collector in collectors:
|
2025-06-02 14:23:08 +08:00
|
|
|
self.collector_manager.add_collector(collector)
|
|
|
|
|
self.collectors.append(collector)
|
2025-06-10 12:55:27 +08:00
|
|
|
|
|
|
|
|
self.stats['collectors_created'] = len(collectors)
|
|
|
|
|
self.logger.info(f"✅ Successfully initialized {len(collectors)} data collectors")
|
|
|
|
|
return True
|
2025-06-02 14:23:08 +08:00
|
|
|
|
2025-06-10 13:12:13 +08:00
|
|
|
except (KeyError, AttributeError, TypeError) as e:
|
|
|
|
|
# Handle configuration and data structure errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"❌ Configuration error initializing collectors: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
|
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
except (ConnectionError, OSError, IOError) as e:
|
|
|
|
|
# Handle connection and I/O related errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"❌ Connection/IO error initializing collectors: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
|
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
2025-06-02 14:23:08 +08:00
|
|
|
except Exception as e:
|
2025-06-10 13:12:13 +08:00
|
|
|
# Catch any other unexpected errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"❌ Unexpected error initializing collectors: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
2025-06-10 12:55:27 +08:00
|
|
|
self.stats['errors_count'] += 1
|
2025-06-02 14:23:08 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
async def start(self) -> bool:
|
|
|
|
|
"""Start the data collection service."""
|
|
|
|
|
try:
|
|
|
|
|
self.start_time = time.time()
|
|
|
|
|
self.running = True
|
|
|
|
|
|
|
|
|
|
self.logger.info("🚀 Starting Data Collection Service...")
|
|
|
|
|
|
|
|
|
|
self.logger.info("📊 Initializing database connection...")
|
|
|
|
|
init_database()
|
|
|
|
|
self.logger.info("✅ Database connection established")
|
|
|
|
|
|
|
|
|
|
# Start collector manager
|
|
|
|
|
self.logger.info("🔌 Starting data collectors...")
|
|
|
|
|
success = await self.collector_manager.start()
|
|
|
|
|
|
|
|
|
|
if success:
|
|
|
|
|
self.stats['collectors_running'] = len(self.collectors)
|
|
|
|
|
self.stats['last_activity'] = datetime.now()
|
|
|
|
|
|
|
|
|
|
self.logger.info("✅ Data Collection Service started successfully")
|
|
|
|
|
self.logger.info(f"📈 Active collectors: {self.stats['collectors_running']}")
|
|
|
|
|
return True
|
|
|
|
|
else:
|
2025-06-10 13:12:13 +08:00
|
|
|
self.logger.error("Failed to start data collectors", exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
|
2025-06-10 13:12:13 +08:00
|
|
|
except (ConnectionError, OSError, IOError) as e:
|
|
|
|
|
# Handle database and connection errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"Database/Connection error starting service: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
|
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
except (AttributeError, TypeError, ValueError) as e:
|
|
|
|
|
# Handle configuration and data validation errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"❌ Configuration error starting service: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
|
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
2025-06-02 14:23:08 +08:00
|
|
|
except Exception as e:
|
2025-06-10 13:12:13 +08:00
|
|
|
# Catch any other unexpected errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"❌ Unexpected error starting service: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
async def stop(self) -> None:
|
|
|
|
|
"""Stop the data collection service gracefully."""
|
|
|
|
|
try:
|
|
|
|
|
self.logger.info("🛑 Stopping Data Collection Service...")
|
|
|
|
|
self.running = False
|
|
|
|
|
|
|
|
|
|
# Stop all collectors
|
|
|
|
|
await self.collector_manager.stop()
|
|
|
|
|
|
|
|
|
|
# Update statistics
|
|
|
|
|
if self.start_time:
|
|
|
|
|
self.stats['total_uptime_seconds'] = time.time() - self.start_time
|
|
|
|
|
|
|
|
|
|
self.stats['collectors_running'] = 0
|
|
|
|
|
|
|
|
|
|
self.logger.info("✅ Data Collection Service stopped gracefully")
|
|
|
|
|
self.logger.info(f"📊 Total uptime: {self.stats['total_uptime_seconds']:.1f} seconds")
|
|
|
|
|
|
2025-06-10 13:12:13 +08:00
|
|
|
except (asyncio.CancelledError, KeyboardInterrupt):
|
|
|
|
|
# Handle graceful shutdown scenarios
|
|
|
|
|
self.logger.warning("Service shutdown was interrupted")
|
|
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
except (ConnectionError, OSError, IOError) as e:
|
|
|
|
|
# Handle connection and I/O related errors during shutdown
|
|
|
|
|
sanitized_message = self._sanitize_error(f"Connection/IO error during service shutdown: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
|
|
|
|
self.stats['errors_count'] += 1
|
2025-06-02 14:23:08 +08:00
|
|
|
except Exception as e:
|
2025-06-10 13:12:13 +08:00
|
|
|
# Catch any other unexpected errors during shutdown
|
|
|
|
|
sanitized_message = self._sanitize_error(f"Unexpected error during service shutdown: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.stats['errors_count'] += 1
|
Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.
These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00
|
|
|
finally:
|
|
|
|
|
# Always cleanup task manager
|
|
|
|
|
await self.task_manager.shutdown(graceful=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
|
|
|
"""Get current service status."""
|
|
|
|
|
current_time = time.time()
|
|
|
|
|
uptime = current_time - self.start_time if self.start_time else 0
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'running': self.running,
|
|
|
|
|
'uptime_seconds': uptime,
|
|
|
|
|
'uptime_hours': uptime / 3600,
|
|
|
|
|
'collectors_total': len(self.collectors),
|
|
|
|
|
'collectors_running': self.stats['collectors_running'],
|
|
|
|
|
'errors_count': self.stats['errors_count'],
|
|
|
|
|
'last_activity': self.stats['last_activity'],
|
|
|
|
|
'start_time': datetime.fromtimestamp(self.start_time) if self.start_time else None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def setup_signal_handlers(self) -> None:
|
|
|
|
|
"""Setup signal handlers for graceful shutdown."""
|
|
|
|
|
def signal_handler(signum, frame):
|
|
|
|
|
self.logger.info(f"📡 Received shutdown signal ({signum}), stopping gracefully...")
|
|
|
|
|
self.shutdown_event.set()
|
|
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
|
|
|
|
|
|
async def run(self, duration_hours: Optional[float] = None) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Run the data collection service.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
duration_hours: Optional duration to run (None = indefinite)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
bool: True if successful, False if error occurred
|
|
|
|
|
"""
|
|
|
|
|
self.setup_signal_handlers()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Initialize collectors
|
|
|
|
|
if not await self.initialize_collectors():
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Start service
|
|
|
|
|
if not await self.start():
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Service running notification
|
|
|
|
|
status = self.get_status()
|
|
|
|
|
if duration_hours:
|
|
|
|
|
self.logger.info(f"⏱️ Service will run for {duration_hours} hours")
|
|
|
|
|
else:
|
|
|
|
|
self.logger.info("⏱️ Service running indefinitely (until stopped)")
|
|
|
|
|
|
|
|
|
|
self.logger.info(f"📊 Active collectors: {status['collectors_running']}")
|
|
|
|
|
self.logger.info("🔍 Monitor with: python scripts/monitor_clean.py")
|
|
|
|
|
|
|
|
|
|
# Main service loop
|
|
|
|
|
update_interval = 600 # Status update every 10 minutes
|
|
|
|
|
last_update = time.time()
|
|
|
|
|
|
|
|
|
|
while not self.shutdown_event.is_set():
|
|
|
|
|
# Wait for shutdown signal or timeout
|
|
|
|
|
try:
|
|
|
|
|
await asyncio.wait_for(self.shutdown_event.wait(), timeout=1.0)
|
|
|
|
|
break
|
|
|
|
|
except asyncio.TimeoutError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
current_time = time.time()
|
|
|
|
|
|
|
|
|
|
# Check duration limit
|
2025-06-10 12:55:27 +08:00
|
|
|
if duration_hours and self.start_time:
|
2025-06-02 14:23:08 +08:00
|
|
|
elapsed_hours = (current_time - self.start_time) / 3600
|
|
|
|
|
if elapsed_hours >= duration_hours:
|
|
|
|
|
self.logger.info(f"⏰ Completed {duration_hours} hour run")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# Periodic status update
|
|
|
|
|
if current_time - last_update >= update_interval:
|
2025-06-10 12:55:27 +08:00
|
|
|
if self.start_time:
|
|
|
|
|
elapsed_hours = (current_time - self.start_time) / 3600
|
|
|
|
|
self.logger.info(f"⏱️ Service uptime: {elapsed_hours:.1f} hours")
|
2025-06-02 14:23:08 +08:00
|
|
|
last_update = current_time
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
2025-06-10 13:12:13 +08:00
|
|
|
except (asyncio.CancelledError, KeyboardInterrupt):
|
|
|
|
|
# Handle graceful shutdown scenarios
|
|
|
|
|
self.logger.info("Service run was cancelled gracefully")
|
|
|
|
|
return True
|
|
|
|
|
except (asyncio.TimeoutError, ConnectionError, OSError, IOError) as e:
|
|
|
|
|
# Handle timeout, connection and I/O related errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"Connection/Timeout error during service run: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
|
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
2025-06-02 14:23:08 +08:00
|
|
|
except Exception as e:
|
2025-06-10 13:12:13 +08:00
|
|
|
# Catch any other unexpected errors
|
|
|
|
|
sanitized_message = self._sanitize_error(f"Unexpected service error: {e}")
|
|
|
|
|
self.logger.error(sanitized_message, exc_info=True)
|
2025-06-02 14:23:08 +08:00
|
|
|
self.stats['errors_count'] += 1
|
|
|
|
|
return False
|
|
|
|
|
finally:
|
|
|
|
|
await self.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Service entry point function
|
|
|
|
|
async def run_data_collection_service(
|
|
|
|
|
config_path: str = "config/data_collection.json",
|
|
|
|
|
duration_hours: Optional[float] = None
|
|
|
|
|
) -> bool:
|
2025-06-10 12:55:27 +08:00
|
|
|
"""Run the data collection service."""
|
2025-06-02 14:23:08 +08:00
|
|
|
service = DataCollectionService(config_path)
|
|
|
|
|
return await service.run(duration_hours)
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="Data Collection Service")
|
2025-06-10 12:55:27 +08:00
|
|
|
parser.add_argument("--config", default="config/data_collection.json", help="Configuration file path")
|
|
|
|
|
parser.add_argument("--duration", type=float, help="Duration to run in hours (default: indefinite)")
|
2025-06-02 14:23:08 +08:00
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
2025-06-10 12:55:27 +08:00
|
|
|
# Run service
|
|
|
|
|
asyncio.run(run_data_collection_service(args.config, args.duration))
|