docs
This commit is contained in:
parent
d508616677
commit
74d7e1ab2c
File diff suppressed because it is too large
Load Diff
@ -27,6 +27,22 @@ The TCP Dashboard implements a sophisticated conditional logging system that all
|
|||||||
3. **Logger Inheritance**: Parent components pass their logger to child components
|
3. **Logger Inheritance**: Parent components pass their logger to child components
|
||||||
4. **Hierarchical Structure**: Log files are organized by component hierarchy
|
4. **Hierarchical Structure**: Log files are organized by component hierarchy
|
||||||
|
|
||||||
|
### Component Hierarchy
|
||||||
|
|
||||||
|
```
|
||||||
|
Top-level Application (individual logger)
|
||||||
|
├── ProductionManager (individual logger)
|
||||||
|
│ ├── DataSaver (receives logger from ProductionManager)
|
||||||
|
│ ├── DataValidator (receives logger from ProductionManager)
|
||||||
|
│ ├── DatabaseConnection (receives logger from ProductionManager)
|
||||||
|
│ └── CollectorManager (individual logger)
|
||||||
|
│ ├── OKX collector BTC-USD (individual logger)
|
||||||
|
│ │ ├── DataAggregator (receives logger from OKX collector)
|
||||||
|
│ │ ├── DataTransformer (receives logger from OKX collector)
|
||||||
|
│ │ └── DataProcessor (receives logger from OKX collector)
|
||||||
|
│ └── Another collector...
|
||||||
|
```
|
||||||
|
|
||||||
### Usage Patterns
|
### Usage Patterns
|
||||||
|
|
||||||
#### 1. No Logging
|
#### 1. No Logging
|
||||||
@ -134,24 +150,48 @@ class ComponentExample:
|
|||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.log_errors_only = log_errors_only
|
self.log_errors_only = log_errors_only
|
||||||
|
|
||||||
# Conditional logging helpers
|
def _log_debug(self, message: str) -> None:
|
||||||
self._log_debug = self._create_conditional_logger('debug')
|
"""Log debug message if logger is available and not in errors-only mode."""
|
||||||
self._log_info = self._create_conditional_logger('info')
|
if self.logger and not self.log_errors_only:
|
||||||
self._log_warning = self._create_conditional_logger('warning')
|
self.logger.debug(message)
|
||||||
self._log_error = self._create_conditional_logger('error')
|
|
||||||
self._log_critical = self._create_conditional_logger('critical')
|
|
||||||
|
|
||||||
def _create_conditional_logger(self, level):
|
def _log_info(self, message: str) -> None:
|
||||||
"""Create conditional logging function based on configuration."""
|
"""Log info message if logger is available and not in errors-only mode."""
|
||||||
if not self.logger:
|
if self.logger and not self.log_errors_only:
|
||||||
return lambda msg: None # No-op if no logger
|
self.logger.info(message)
|
||||||
|
|
||||||
|
def _log_warning(self, message: str) -> None:
|
||||||
|
"""Log warning message if logger is available and not in errors-only mode."""
|
||||||
|
if self.logger and not self.log_errors_only:
|
||||||
|
self.logger.warning(message)
|
||||||
|
|
||||||
|
def _log_error(self, message: str, exc_info: bool = False) -> None:
|
||||||
|
"""Log error message if logger is available (always logs errors)."""
|
||||||
|
if self.logger:
|
||||||
|
self.logger.error(message, exc_info=exc_info)
|
||||||
|
|
||||||
|
def _log_critical(self, message: str, exc_info: bool = False) -> None:
|
||||||
|
"""Log critical message if logger is available (always logs critical)."""
|
||||||
|
if self.logger:
|
||||||
|
self.logger.critical(message, exc_info=exc_info)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Child Component Pattern
|
||||||
|
|
||||||
|
Child components receive logger from parent:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class OKXCollector(BaseDataCollector):
|
||||||
|
def __init__(self, symbol: str, logger=None, log_errors_only=False):
|
||||||
|
super().__init__(..., logger=logger, log_errors_only=log_errors_only)
|
||||||
|
|
||||||
log_func = getattr(self.logger, level)
|
# Pass logger to child components
|
||||||
|
self._data_processor = OKXDataProcessor(
|
||||||
if level in ['debug', 'info', 'warning'] and self.log_errors_only:
|
symbol,
|
||||||
return lambda msg: None # Suppress non-error messages
|
logger=self.logger # Pass parent's logger
|
||||||
|
)
|
||||||
return log_func # Normal logging
|
self._data_validator = DataValidator(logger=self.logger)
|
||||||
|
self._data_transformer = DataTransformer(logger=self.logger)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supported Components
|
#### Supported Components
|
||||||
@ -178,179 +218,6 @@ The following components support conditional logging:
|
|||||||
- Parameters: `logger=None`
|
- Parameters: `logger=None`
|
||||||
- Data processing with conditional logging
|
- Data processing with conditional logging
|
||||||
|
|
||||||
### Best Practices for Conditional Logging
|
|
||||||
|
|
||||||
#### 1. Logger Inheritance
|
|
||||||
```python
|
|
||||||
# Parent component creates logger
|
|
||||||
parent_logger = get_logger('parent_system')
|
|
||||||
parent = ParentComponent(logger=parent_logger)
|
|
||||||
|
|
||||||
# Pass logger to children for consistent hierarchy
|
|
||||||
child1 = ChildComponent(logger=parent_logger)
|
|
||||||
child2 = ChildComponent(logger=parent_logger, log_errors_only=True)
|
|
||||||
child3 = ChildComponent(logger=None) # No logging
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 2. Environment-Based Configuration
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from utils.logger import get_logger
|
|
||||||
|
|
||||||
def create_system_logger():
|
|
||||||
"""Create logger based on environment."""
|
|
||||||
env = os.getenv('ENVIRONMENT', 'development')
|
|
||||||
|
|
||||||
if env == 'production':
|
|
||||||
return get_logger('production_system', log_level='INFO', verbose=False)
|
|
||||||
elif env == 'testing':
|
|
||||||
return None # No logging during tests
|
|
||||||
else:
|
|
||||||
return get_logger('dev_system', log_level='DEBUG', verbose=True)
|
|
||||||
|
|
||||||
# Use in components
|
|
||||||
system_logger = create_system_logger()
|
|
||||||
manager = CollectorManager(logger=system_logger)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 3. Conditional Error-Only Mode
|
|
||||||
```python
|
|
||||||
def create_collector_with_logging_strategy(symbol, strategy='normal'):
|
|
||||||
"""Create collector with different logging strategies."""
|
|
||||||
base_logger = get_logger(f'collector_{symbol.lower().replace("-", "_")}')
|
|
||||||
|
|
||||||
if strategy == 'silent':
|
|
||||||
return OKXCollector(symbol, logger=None)
|
|
||||||
elif strategy == 'errors_only':
|
|
||||||
return OKXCollector(symbol, logger=base_logger, log_errors_only=True)
|
|
||||||
else:
|
|
||||||
return OKXCollector(symbol, logger=base_logger)
|
|
||||||
|
|
||||||
# Usage
|
|
||||||
btc_collector = create_collector_with_logging_strategy('BTC-USDT', 'normal')
|
|
||||||
eth_collector = create_collector_with_logging_strategy('ETH-USDT', 'errors_only')
|
|
||||||
ada_collector = create_collector_with_logging_strategy('ADA-USDT', 'silent')
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 4. Performance Optimization
|
|
||||||
```python
|
|
||||||
class OptimizedComponent:
|
|
||||||
def __init__(self, logger=None, log_errors_only=False):
|
|
||||||
self.logger = logger
|
|
||||||
self.log_errors_only = log_errors_only
|
|
||||||
|
|
||||||
# Pre-compute logging capabilities for performance
|
|
||||||
self.can_log_debug = logger and not log_errors_only
|
|
||||||
self.can_log_info = logger and not log_errors_only
|
|
||||||
self.can_log_warning = logger and not log_errors_only
|
|
||||||
self.can_log_error = logger is not None
|
|
||||||
self.can_log_critical = logger is not None
|
|
||||||
|
|
||||||
def process_data(self, data):
|
|
||||||
if self.can_log_debug:
|
|
||||||
self.logger.debug(f"Processing {len(data)} records")
|
|
||||||
|
|
||||||
# ... processing logic ...
|
|
||||||
|
|
||||||
if self.can_log_info:
|
|
||||||
self.logger.info("Data processing completed")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Migration Guide
|
|
||||||
|
|
||||||
#### From Standard Logging
|
|
||||||
```python
|
|
||||||
# Old approach
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class OldComponent:
|
|
||||||
def __init__(self):
|
|
||||||
self.logger = logger
|
|
||||||
|
|
||||||
# New conditional approach
|
|
||||||
from utils.logger import get_logger
|
|
||||||
|
|
||||||
class NewComponent:
|
|
||||||
def __init__(self, logger=None, log_errors_only=False):
|
|
||||||
self.logger = logger
|
|
||||||
self.log_errors_only = log_errors_only
|
|
||||||
|
|
||||||
# Add conditional logging helpers
|
|
||||||
self._setup_conditional_logging()
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Gradual Adoption
|
|
||||||
1. **Phase 1**: Add optional logger parameters to new components
|
|
||||||
2. **Phase 2**: Update existing components to support conditional logging
|
|
||||||
3. **Phase 3**: Implement hierarchical logging structure
|
|
||||||
4. **Phase 4**: Add error-only logging mode
|
|
||||||
|
|
||||||
### Testing Conditional Logging
|
|
||||||
|
|
||||||
#### Test Script Example
|
|
||||||
```python
|
|
||||||
# test_conditional_logging.py
|
|
||||||
from utils.logger import get_logger
|
|
||||||
from data.collector_manager import CollectorManager
|
|
||||||
from data.exchanges.okx.collector import OKXCollector
|
|
||||||
|
|
||||||
def test_no_logging():
|
|
||||||
"""Test components work without loggers."""
|
|
||||||
manager = CollectorManager(logger=None)
|
|
||||||
collector = OKXCollector("BTC-USDT", logger=None)
|
|
||||||
print("✓ No logging test passed")
|
|
||||||
|
|
||||||
def test_with_logging():
|
|
||||||
"""Test components work with loggers."""
|
|
||||||
logger = get_logger('test_system')
|
|
||||||
manager = CollectorManager(logger=logger)
|
|
||||||
collector = OKXCollector("BTC-USDT", logger=logger)
|
|
||||||
print("✓ With logging test passed")
|
|
||||||
|
|
||||||
def test_error_only():
|
|
||||||
"""Test error-only logging mode."""
|
|
||||||
logger = get_logger('test_errors')
|
|
||||||
collector = OKXCollector("BTC-USDT", logger=logger, log_errors_only=True)
|
|
||||||
print("✓ Error-only logging test passed")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_no_logging()
|
|
||||||
test_with_logging()
|
|
||||||
test_error_only()
|
|
||||||
print("✅ All conditional logging tests passed!")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Log Format
|
|
||||||
|
|
||||||
All log messages follow this unified format:
|
|
||||||
```
|
|
||||||
[YYYY-MM-DD HH:MM:SS - LEVEL - message]
|
|
||||||
```
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```
|
|
||||||
[2024-01-15 14:30:25 - INFO - Bot started successfully]
|
|
||||||
[2024-01-15 14:30:26 - ERROR - Connection failed: timeout]
|
|
||||||
```
|
|
||||||
|
|
||||||
## File Organization
|
|
||||||
|
|
||||||
Logs are organized in a hierarchical structure:
|
|
||||||
```
|
|
||||||
logs/
|
|
||||||
├── app/
|
|
||||||
│ ├── 2024-01-15.txt
|
|
||||||
│ └── 2024-01-16.txt
|
|
||||||
├── bot_manager/
|
|
||||||
│ ├── 2024-01-15.txt
|
|
||||||
│ └── 2024-01-16.txt
|
|
||||||
├── data_collector/
|
|
||||||
│ └── 2024-01-15.txt
|
|
||||||
└── strategies/
|
|
||||||
└── 2024-01-15.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
## Basic Usage
|
## Basic Usage
|
||||||
|
|
||||||
### Import and Initialize
|
### Import and Initialize
|
||||||
@ -414,6 +281,38 @@ class BotManager:
|
|||||||
self.logger.info(f"Bot {bot_id} stopped")
|
self.logger.info(f"Bot {bot_id} stopped")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Log Format
|
||||||
|
|
||||||
|
All log messages follow this unified format:
|
||||||
|
```
|
||||||
|
[YYYY-MM-DD HH:MM:SS - LEVEL - message]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```
|
||||||
|
[2024-01-15 14:30:25 - INFO - Bot started successfully]
|
||||||
|
[2024-01-15 14:30:26 - ERROR - Connection failed: timeout]
|
||||||
|
```
|
||||||
|
|
||||||
|
## File Organization
|
||||||
|
|
||||||
|
Logs are organized in a hierarchical structure:
|
||||||
|
```
|
||||||
|
logs/
|
||||||
|
├── tcp_dashboard/
|
||||||
|
│ ├── 2024-01-15.txt
|
||||||
|
│ └── 2024-01-16.txt
|
||||||
|
├── production_manager/
|
||||||
|
│ ├── 2024-01-15.txt
|
||||||
|
│ └── 2024-01-16.txt
|
||||||
|
├── collector_manager/
|
||||||
|
│ └── 2024-01-15.txt
|
||||||
|
├── okx_collector_btc_usdt/
|
||||||
|
│ └── 2024-01-15.txt
|
||||||
|
└── okx_collector_eth_usdt/
|
||||||
|
└── 2024-01-15.txt
|
||||||
|
```
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
### Logger Parameters
|
### Logger Parameters
|
||||||
@ -487,6 +386,84 @@ logger = get_logger('bot_manager', max_log_files=14)
|
|||||||
- Deletes older files automatically
|
- Deletes older files automatically
|
||||||
- Based on file modification time, not filename
|
- Based on file modification time, not filename
|
||||||
|
|
||||||
|
## Best Practices for Conditional Logging
|
||||||
|
|
||||||
|
### 1. Logger Inheritance
|
||||||
|
```python
|
||||||
|
# Parent component creates logger
|
||||||
|
parent_logger = get_logger('parent_system')
|
||||||
|
parent = ParentComponent(logger=parent_logger)
|
||||||
|
|
||||||
|
# Pass logger to children for consistent hierarchy
|
||||||
|
child1 = ChildComponent(logger=parent_logger)
|
||||||
|
child2 = ChildComponent(logger=parent_logger, log_errors_only=True)
|
||||||
|
child3 = ChildComponent(logger=None) # No logging
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Environment-Based Configuration
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from utils.logger import get_logger
|
||||||
|
|
||||||
|
def create_system_logger():
|
||||||
|
"""Create logger based on environment."""
|
||||||
|
env = os.getenv('ENVIRONMENT', 'development')
|
||||||
|
|
||||||
|
if env == 'production':
|
||||||
|
return get_logger('production_system', log_level='INFO', verbose=False)
|
||||||
|
elif env == 'testing':
|
||||||
|
return None # No logging during tests
|
||||||
|
else:
|
||||||
|
return get_logger('dev_system', log_level='DEBUG', verbose=True)
|
||||||
|
|
||||||
|
# Use in components
|
||||||
|
system_logger = create_system_logger()
|
||||||
|
manager = CollectorManager(logger=system_logger)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Conditional Error-Only Mode
|
||||||
|
```python
|
||||||
|
def create_collector_with_logging_strategy(symbol, strategy='normal'):
|
||||||
|
"""Create collector with different logging strategies."""
|
||||||
|
base_logger = get_logger(f'collector_{symbol.lower().replace("-", "_")}')
|
||||||
|
|
||||||
|
if strategy == 'silent':
|
||||||
|
return OKXCollector(symbol, logger=None)
|
||||||
|
elif strategy == 'errors_only':
|
||||||
|
return OKXCollector(symbol, logger=base_logger, log_errors_only=True)
|
||||||
|
else:
|
||||||
|
return OKXCollector(symbol, logger=base_logger)
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
btc_collector = create_collector_with_logging_strategy('BTC-USDT', 'normal')
|
||||||
|
eth_collector = create_collector_with_logging_strategy('ETH-USDT', 'errors_only')
|
||||||
|
ada_collector = create_collector_with_logging_strategy('ADA-USDT', 'silent')
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Performance Optimization
|
||||||
|
```python
|
||||||
|
class OptimizedComponent:
|
||||||
|
def __init__(self, logger=None, log_errors_only=False):
|
||||||
|
self.logger = logger
|
||||||
|
self.log_errors_only = log_errors_only
|
||||||
|
|
||||||
|
# Pre-compute logging capabilities for performance
|
||||||
|
self.can_log_debug = logger and not log_errors_only
|
||||||
|
self.can_log_info = logger and not log_errors_only
|
||||||
|
self.can_log_warning = logger and not log_errors_only
|
||||||
|
self.can_log_error = logger is not None
|
||||||
|
self.can_log_critical = logger is not None
|
||||||
|
|
||||||
|
def process_data(self, data):
|
||||||
|
if self.can_log_debug:
|
||||||
|
self.logger.debug(f"Processing {len(data)} records")
|
||||||
|
|
||||||
|
# ... processing logic ...
|
||||||
|
|
||||||
|
if self.can_log_info:
|
||||||
|
self.logger.info("Data processing completed")
|
||||||
|
```
|
||||||
|
|
||||||
## Advanced Features
|
## Advanced Features
|
||||||
|
|
||||||
### Manual Log Cleanup
|
### Manual Log Cleanup
|
||||||
@ -671,16 +648,37 @@ if logger.isEnabledFor(logging.DEBUG):
|
|||||||
logger.debug(f"Data: {expensive_serialization(data)}")
|
logger.debug(f"Data: {expensive_serialization(data)}")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Integration with Existing Code
|
## Migration Guide
|
||||||
|
|
||||||
The logging system is designed to be gradually adopted:
|
### Updating Existing Components
|
||||||
|
|
||||||
1. **Start with new modules**: Use the unified logger in new code
|
1. **Add logger parameter to constructor**:
|
||||||
2. **Replace existing logging**: Gradually migrate existing logging to the unified system
|
```python
|
||||||
3. **No breaking changes**: Existing code continues to work
|
def __init__(self, ..., logger=None, log_errors_only=False):
|
||||||
|
```
|
||||||
|
|
||||||
### Migration Example
|
2. **Add conditional logging helpers**:
|
||||||
|
```python
|
||||||
|
def _log_debug(self, message: str) -> None:
|
||||||
|
if self.logger and not self.log_errors_only:
|
||||||
|
self.logger.debug(message)
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Update all logging calls**:
|
||||||
|
```python
|
||||||
|
# Before
|
||||||
|
self.logger.info("Message")
|
||||||
|
|
||||||
|
# After
|
||||||
|
self._log_info("Message")
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Pass logger to child components**:
|
||||||
|
```python
|
||||||
|
child = ChildComponent(logger=self.logger)
|
||||||
|
```
|
||||||
|
|
||||||
|
### From Standard Logging
|
||||||
```python
|
```python
|
||||||
# Old logging (if any existed)
|
# Old logging (if any existed)
|
||||||
import logging
|
import logging
|
||||||
@ -692,13 +690,113 @@ from utils.logger import get_logger
|
|||||||
logger = get_logger('component_name', verbose=True)
|
logger = get_logger('component_name', verbose=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Gradual Adoption
|
||||||
|
1. **Phase 1**: Add optional logger parameters to new components
|
||||||
|
2. **Phase 2**: Update existing components to support conditional logging
|
||||||
|
3. **Phase 3**: Implement hierarchical logging structure
|
||||||
|
4. **Phase 4**: Add error-only logging mode
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
|
### Testing Conditional Logging
|
||||||
|
|
||||||
|
#### Test Script Example
|
||||||
|
```python
|
||||||
|
# test_conditional_logging.py
|
||||||
|
from utils.logger import get_logger
|
||||||
|
from data.collector_manager import CollectorManager
|
||||||
|
from data.exchanges.okx.collector import OKXCollector
|
||||||
|
|
||||||
|
def test_no_logging():
|
||||||
|
"""Test components work without loggers."""
|
||||||
|
manager = CollectorManager(logger=None)
|
||||||
|
collector = OKXCollector("BTC-USDT", logger=None)
|
||||||
|
print("✓ No logging test passed")
|
||||||
|
|
||||||
|
def test_with_logging():
|
||||||
|
"""Test components work with loggers."""
|
||||||
|
logger = get_logger('test_system')
|
||||||
|
manager = CollectorManager(logger=logger)
|
||||||
|
collector = OKXCollector("BTC-USDT", logger=logger)
|
||||||
|
print("✓ With logging test passed")
|
||||||
|
|
||||||
|
def test_error_only():
|
||||||
|
"""Test error-only logging mode."""
|
||||||
|
logger = get_logger('test_errors')
|
||||||
|
collector = OKXCollector("BTC-USDT", logger=logger, log_errors_only=True)
|
||||||
|
print("✓ Error-only logging test passed")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_no_logging()
|
||||||
|
test_with_logging()
|
||||||
|
test_error_only()
|
||||||
|
print("✅ All conditional logging tests passed!")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing Changes
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Test without logger
|
||||||
|
component = MyComponent(logger=None)
|
||||||
|
# Should work without errors, no logging
|
||||||
|
|
||||||
|
# Test with logger
|
||||||
|
logger = get_logger('test_component')
|
||||||
|
component = MyComponent(logger=logger)
|
||||||
|
# Should log normally
|
||||||
|
|
||||||
|
# Test error-only mode
|
||||||
|
component = MyComponent(logger=logger, log_errors_only=True)
|
||||||
|
# Should only log errors
|
||||||
|
```
|
||||||
|
|
||||||
|
### Basic System Test
|
||||||
|
|
||||||
Run a simple test to verify the logging system:
|
Run a simple test to verify the logging system:
|
||||||
```bash
|
```bash
|
||||||
python -c "from utils.logger import get_logger; logger = get_logger('test', verbose=True); logger.info('Test message'); print('Check logs/test/ directory')"
|
python -c "from utils.logger import get_logger; logger = get_logger('test', verbose=True); logger.info('Test message'); print('Check logs/test/ directory')"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Permission errors**: Ensure the application has write permissions to the project directory
|
||||||
|
2. **Disk space**: Monitor disk usage and adjust log retention with `max_log_files`
|
||||||
|
3. **Threading issues**: The logger is thread-safe, but check for application-level concurrency issues
|
||||||
|
4. **Too many console messages**: Adjust `verbose` parameter or log levels
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
Enable debug logging to troubleshoot issues:
|
||||||
|
```python
|
||||||
|
logger = get_logger('component_name', 'DEBUG', verbose=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Console Output Issues
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Force console output regardless of environment
|
||||||
|
logger = get_logger('component_name', verbose=True)
|
||||||
|
|
||||||
|
# Check environment variables
|
||||||
|
import os
|
||||||
|
print(f"VERBOSE_LOGGING: {os.getenv('VERBOSE_LOGGING')}")
|
||||||
|
print(f"LOG_TO_CONSOLE: {os.getenv('LOG_TO_CONSOLE')}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Fallback Logging
|
||||||
|
|
||||||
|
If file logging fails, the system automatically falls back to console logging with a warning message.
|
||||||
|
|
||||||
|
## Integration with Existing Code
|
||||||
|
|
||||||
|
The logging system is designed to be gradually adopted:
|
||||||
|
|
||||||
|
1. **Start with new modules**: Use the unified logger in new code
|
||||||
|
2. **Replace existing logging**: Gradually migrate existing logging to the unified system
|
||||||
|
3. **No breaking changes**: Existing code continues to work
|
||||||
|
|
||||||
## Maintenance
|
## Maintenance
|
||||||
|
|
||||||
### Automatic Cleanup Benefits
|
### Automatic Cleanup Benefits
|
||||||
@ -735,49 +833,4 @@ find logs/ -name "*.txt" -size +10M
|
|||||||
find logs/ -name "*.txt" | cut -d'/' -f2 | sort | uniq -c
|
find logs/ -name "*.txt" | cut -d'/' -f2 | sort | uniq -c
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
This conditional logging system provides maximum flexibility while maintaining clean, maintainable code that works in all scenarios.
|
||||||
|
|
||||||
### Common Issues
|
|
||||||
|
|
||||||
1. **Permission errors**: Ensure the application has write permissions to the project directory
|
|
||||||
2. **Disk space**: Monitor disk usage and adjust log retention with `max_log_files`
|
|
||||||
3. **Threading issues**: The logger is thread-safe, but check for application-level concurrency issues
|
|
||||||
4. **Too many console messages**: Adjust `verbose` parameter or log levels
|
|
||||||
|
|
||||||
### Debug Mode
|
|
||||||
|
|
||||||
Enable debug logging to troubleshoot issues:
|
|
||||||
```python
|
|
||||||
logger = get_logger('component_name', 'DEBUG', verbose=True)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Console Output Issues
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Force console output regardless of environment
|
|
||||||
logger = get_logger('component_name', verbose=True)
|
|
||||||
|
|
||||||
# Check environment variables
|
|
||||||
import os
|
|
||||||
print(f"VERBOSE_LOGGING: {os.getenv('VERBOSE_LOGGING')}")
|
|
||||||
print(f"LOG_TO_CONSOLE: {os.getenv('LOG_TO_CONSOLE')}")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Fallback Logging
|
|
||||||
|
|
||||||
If file logging fails, the system automatically falls back to console logging with a warning message.
|
|
||||||
|
|
||||||
## New Features Summary
|
|
||||||
|
|
||||||
### Verbose Parameter
|
|
||||||
- Controls console logging output
|
|
||||||
- Respects log levels (DEBUG shows all, ERROR shows only errors)
|
|
||||||
- Uses environment variables as default (`VERBOSE_LOGGING` or `LOG_TO_CONSOLE`)
|
|
||||||
- Can be explicitly set to `True`/`False` to override environment
|
|
||||||
|
|
||||||
### Automatic Cleanup
|
|
||||||
- Enabled by default (`clean_old_logs=True`)
|
|
||||||
- Triggered when new log files are created (date changes)
|
|
||||||
- Keeps most recent `max_log_files` files (default: 30)
|
|
||||||
- Component-specific retention policies
|
|
||||||
- Non-blocking operation with error handling
|
|
||||||
@ -1,481 +0,0 @@
|
|||||||
# Data Collection Service
|
|
||||||
|
|
||||||
The Data Collection Service is a production-ready service for cryptocurrency market data collection with clean logging and robust error handling. It manages multiple data collectors for different trading pairs and exchanges.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- **Clean Logging**: Only essential information (connections, disconnections, errors)
|
|
||||||
- **Multi-Exchange Support**: Extensible architecture for multiple exchanges
|
|
||||||
- **Health Monitoring**: Built-in health checks and auto-recovery
|
|
||||||
- **Configurable**: JSON-based configuration with sensible defaults
|
|
||||||
- **Graceful Shutdown**: Proper signal handling and cleanup
|
|
||||||
- **Testing**: Comprehensive unit test coverage
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
### Basic Usage
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Start with default configuration (indefinite run)
|
|
||||||
python scripts/start_data_collection.py
|
|
||||||
|
|
||||||
# Run for 8 hours
|
|
||||||
python scripts/start_data_collection.py --hours 8
|
|
||||||
|
|
||||||
# Use custom configuration
|
|
||||||
python scripts/start_data_collection.py --config config/my_config.json
|
|
||||||
```
|
|
||||||
|
|
||||||
### Monitoring
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check status once
|
|
||||||
python scripts/monitor_clean.py
|
|
||||||
|
|
||||||
# Monitor continuously every 60 seconds
|
|
||||||
python scripts/monitor_clean.py --interval 60
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
The service uses JSON configuration files with automatic default creation if none exists.
|
|
||||||
|
|
||||||
### Default Configuration Location
|
|
||||||
|
|
||||||
`config/data_collection.json`
|
|
||||||
|
|
||||||
### Configuration Structure
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"exchanges": {
|
|
||||||
"okx": {
|
|
||||||
"enabled": true,
|
|
||||||
"trading_pairs": [
|
|
||||||
{
|
|
||||||
"symbol": "BTC-USDT",
|
|
||||||
"enabled": true,
|
|
||||||
"data_types": ["trade"],
|
|
||||||
"timeframes": ["1m", "5m", "15m", "1h"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"symbol": "ETH-USDT",
|
|
||||||
"enabled": true,
|
|
||||||
"data_types": ["trade"],
|
|
||||||
"timeframes": ["1m", "5m", "15m", "1h"]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"collection_settings": {
|
|
||||||
"health_check_interval": 120,
|
|
||||||
"store_raw_data": true,
|
|
||||||
"auto_restart": true,
|
|
||||||
"max_restart_attempts": 3
|
|
||||||
},
|
|
||||||
"logging": {
|
|
||||||
"level": "INFO",
|
|
||||||
"log_errors_only": true,
|
|
||||||
"verbose_data_logging": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configuration Options
|
|
||||||
|
|
||||||
#### Exchange Settings
|
|
||||||
|
|
||||||
- **enabled**: Whether to enable this exchange
|
|
||||||
- **trading_pairs**: Array of trading pair configurations
|
|
||||||
|
|
||||||
#### Trading Pair Settings
|
|
||||||
|
|
||||||
- **symbol**: Trading pair symbol (e.g., "BTC-USDT")
|
|
||||||
- **enabled**: Whether to collect data for this pair
|
|
||||||
- **data_types**: Types of data to collect (["trade"], ["ticker"], etc.)
|
|
||||||
- **timeframes**: Candle timeframes to generate (["1m", "5m", "15m", "1h", "4h", "1d"])
|
|
||||||
|
|
||||||
#### Collection Settings
|
|
||||||
|
|
||||||
- **health_check_interval**: Health check frequency in seconds
|
|
||||||
- **store_raw_data**: Whether to store raw trade data
|
|
||||||
- **auto_restart**: Enable automatic restart on failures
|
|
||||||
- **max_restart_attempts**: Maximum restart attempts before giving up
|
|
||||||
|
|
||||||
#### Logging Settings
|
|
||||||
|
|
||||||
- **level**: Log level ("DEBUG", "INFO", "WARNING", "ERROR")
|
|
||||||
- **log_errors_only**: Only log errors and essential events
|
|
||||||
- **verbose_data_logging**: Enable verbose logging of individual trades/candles
|
|
||||||
|
|
||||||
## Service Architecture
|
|
||||||
|
|
||||||
### Core Components
|
|
||||||
|
|
||||||
1. **DataCollectionService**: Main service class managing the lifecycle
|
|
||||||
2. **CollectorManager**: Manages multiple data collectors with health monitoring
|
|
||||||
3. **ExchangeFactory**: Creates exchange-specific collectors
|
|
||||||
4. **BaseDataCollector**: Abstract base for all data collectors
|
|
||||||
|
|
||||||
### Data Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
Exchange API → Data Collector → Data Processor → Database
|
|
||||||
↓
|
|
||||||
Health Monitor → Service Manager
|
|
||||||
```
|
|
||||||
|
|
||||||
### Storage
|
|
||||||
|
|
||||||
- **Raw Data**: PostgreSQL `raw_trades` table
|
|
||||||
- **Candles**: PostgreSQL `market_data` table with multiple timeframes
|
|
||||||
- **Real-time**: Redis pub/sub for live data distribution
|
|
||||||
|
|
||||||
## Logging Philosophy
|
|
||||||
|
|
||||||
The service implements **clean production logging** focused on operational needs:
|
|
||||||
|
|
||||||
### What Gets Logged
|
|
||||||
|
|
||||||
✅ **Service Lifecycle**
|
|
||||||
- Service start/stop
|
|
||||||
- Collector initialization
|
|
||||||
- Database connections
|
|
||||||
|
|
||||||
✅ **Connection Events**
|
|
||||||
- WebSocket connect/disconnect
|
|
||||||
- Reconnection attempts
|
|
||||||
- API errors
|
|
||||||
|
|
||||||
✅ **Health & Errors**
|
|
||||||
- Health check results
|
|
||||||
- Error conditions
|
|
||||||
- Recovery actions
|
|
||||||
|
|
||||||
✅ **Statistics**
|
|
||||||
- Periodic uptime reports
|
|
||||||
- Collection summary
|
|
||||||
|
|
||||||
### What Doesn't Get Logged
|
|
||||||
|
|
||||||
❌ **Individual Data Points**
|
|
||||||
- Every trade received
|
|
||||||
- Every candle generated
|
|
||||||
- Raw market data
|
|
||||||
|
|
||||||
❌ **Verbose Operations**
|
|
||||||
- Database queries
|
|
||||||
- Internal processing steps
|
|
||||||
- Routine heartbeats
|
|
||||||
|
|
||||||
## API Reference
|
|
||||||
|
|
||||||
### DataCollectionService
|
|
||||||
|
|
||||||
The main service class for managing data collection.
|
|
||||||
|
|
||||||
#### Constructor
|
|
||||||
|
|
||||||
```python
|
|
||||||
DataCollectionService(config_path: str = "config/data_collection.json")
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Methods
|
|
||||||
|
|
||||||
##### `async run(duration_hours: Optional[float] = None) -> bool`
|
|
||||||
|
|
||||||
Run the service for a specified duration or indefinitely.
|
|
||||||
|
|
||||||
**Parameters:**
|
|
||||||
- `duration_hours`: Optional duration in hours (None = indefinite)
|
|
||||||
|
|
||||||
**Returns:**
|
|
||||||
- `bool`: True if successful, False if error occurred
|
|
||||||
|
|
||||||
##### `async start() -> bool`
|
|
||||||
|
|
||||||
Start the data collection service.
|
|
||||||
|
|
||||||
**Returns:**
|
|
||||||
- `bool`: True if started successfully
|
|
||||||
|
|
||||||
##### `async stop() -> None`
|
|
||||||
|
|
||||||
Stop the service gracefully.
|
|
||||||
|
|
||||||
##### `get_status() -> Dict[str, Any]`
|
|
||||||
|
|
||||||
Get current service status including uptime, collector counts, and errors.
|
|
||||||
|
|
||||||
**Returns:**
|
|
||||||
- `dict`: Status information
|
|
||||||
|
|
||||||
### Standalone Function
|
|
||||||
|
|
||||||
#### `run_data_collection_service(config_path, duration_hours)`
|
|
||||||
|
|
||||||
```python
|
|
||||||
async def run_data_collection_service(
|
|
||||||
config_path: str = "config/data_collection.json",
|
|
||||||
duration_hours: Optional[float] = None
|
|
||||||
) -> bool
|
|
||||||
```
|
|
||||||
|
|
||||||
Convenience function to run the service.
|
|
||||||
|
|
||||||
## Integration Examples
|
|
||||||
|
|
||||||
### Basic Integration
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from data.collection_service import DataCollectionService
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
service = DataCollectionService("config/my_config.json")
|
|
||||||
await service.run(duration_hours=24) # Run for 24 hours
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
```
|
|
||||||
|
|
||||||
### Custom Status Monitoring
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from data.collection_service import DataCollectionService
|
|
||||||
|
|
||||||
async def monitor_service():
|
|
||||||
service = DataCollectionService()
|
|
||||||
|
|
||||||
# Start service in background
|
|
||||||
start_task = asyncio.create_task(service.run())
|
|
||||||
|
|
||||||
# Monitor status every 5 minutes
|
|
||||||
while service.running:
|
|
||||||
status = service.get_status()
|
|
||||||
print(f"Uptime: {status['uptime_hours']:.1f}h, "
|
|
||||||
f"Collectors: {status['collectors_running']}, "
|
|
||||||
f"Errors: {status['errors_count']}")
|
|
||||||
|
|
||||||
await asyncio.sleep(300) # 5 minutes
|
|
||||||
|
|
||||||
await start_task
|
|
||||||
|
|
||||||
asyncio.run(monitor_service())
|
|
||||||
```
|
|
||||||
|
|
||||||
### Programmatic Control
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from data.collection_service import DataCollectionService
|
|
||||||
|
|
||||||
async def controlled_collection():
|
|
||||||
service = DataCollectionService()
|
|
||||||
|
|
||||||
# Initialize and start
|
|
||||||
await service.initialize_collectors()
|
|
||||||
await service.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Run for 1 hour
|
|
||||||
await asyncio.sleep(3600)
|
|
||||||
finally:
|
|
||||||
# Graceful shutdown
|
|
||||||
await service.stop()
|
|
||||||
|
|
||||||
asyncio.run(controlled_collection())
|
|
||||||
```
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
|
|
||||||
The service implements robust error handling at multiple levels:
|
|
||||||
|
|
||||||
### Service Level
|
|
||||||
|
|
||||||
- **Configuration Errors**: Invalid JSON, missing files
|
|
||||||
- **Initialization Errors**: Database connection, collector creation
|
|
||||||
- **Runtime Errors**: Unexpected exceptions during operation
|
|
||||||
|
|
||||||
### Collector Level
|
|
||||||
|
|
||||||
- **Connection Errors**: WebSocket disconnections, API failures
|
|
||||||
- **Data Errors**: Invalid data formats, processing failures
|
|
||||||
- **Health Errors**: Failed health checks, timeout conditions
|
|
||||||
|
|
||||||
### Recovery Strategies
|
|
||||||
|
|
||||||
1. **Automatic Restart**: Collectors auto-restart on failures
|
|
||||||
2. **Exponential Backoff**: Increasing delays between retry attempts
|
|
||||||
3. **Circuit Breaker**: Stop retrying after max attempts exceeded
|
|
||||||
4. **Graceful Degradation**: Continue with healthy collectors
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
### Running Tests
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Run all data collection service tests
|
|
||||||
uv run pytest tests/test_data_collection_service.py -v
|
|
||||||
|
|
||||||
# Run specific test
|
|
||||||
uv run pytest tests/test_data_collection_service.py::TestDataCollectionService::test_service_initialization -v
|
|
||||||
|
|
||||||
# Run with coverage
|
|
||||||
uv run pytest tests/test_data_collection_service.py --cov=data.collection_service
|
|
||||||
```
|
|
||||||
|
|
||||||
### Test Coverage
|
|
||||||
|
|
||||||
The test suite covers:
|
|
||||||
- Service initialization and configuration
|
|
||||||
- Collector creation and management
|
|
||||||
- Service lifecycle (start/stop)
|
|
||||||
- Error handling and recovery
|
|
||||||
- Configuration validation
|
|
||||||
- Signal handling
|
|
||||||
- Status reporting
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Common Issues
|
|
||||||
|
|
||||||
#### Configuration Not Found
|
|
||||||
|
|
||||||
```
|
|
||||||
❌ Failed to load config from config/data_collection.json: [Errno 2] No such file or directory
|
|
||||||
```
|
|
||||||
|
|
||||||
**Solution**: The service will create a default configuration. Check the created file and adjust as needed.
|
|
||||||
|
|
||||||
#### Database Connection Failed
|
|
||||||
|
|
||||||
```
|
|
||||||
❌ Database connection failed: connection refused
|
|
||||||
```
|
|
||||||
|
|
||||||
**Solution**: Ensure PostgreSQL and Redis are running via Docker:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker-compose up -d postgres redis
|
|
||||||
```
|
|
||||||
|
|
||||||
#### No Collectors Created
|
|
||||||
|
|
||||||
```
|
|
||||||
❌ No collectors were successfully initialized
|
|
||||||
```
|
|
||||||
|
|
||||||
**Solution**: Check configuration - ensure at least one exchange is enabled with valid trading pairs.
|
|
||||||
|
|
||||||
#### WebSocket Connection Issues
|
|
||||||
|
|
||||||
```
|
|
||||||
❌ Failed to start data collectors
|
|
||||||
```
|
|
||||||
|
|
||||||
**Solution**: Check network connectivity and API credentials. Verify exchange is accessible.
|
|
||||||
|
|
||||||
### Debug Mode
|
|
||||||
|
|
||||||
For verbose debugging, modify the logging configuration:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"logging": {
|
|
||||||
"level": "DEBUG",
|
|
||||||
"log_errors_only": false,
|
|
||||||
"verbose_data_logging": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
⚠️ **Warning**: Debug mode generates extensive logs and should not be used in production.
|
|
||||||
|
|
||||||
## Production Deployment
|
|
||||||
|
|
||||||
### Docker
|
|
||||||
|
|
||||||
The service can be containerized for production deployment:
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
FROM python:3.11-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN pip install uv
|
|
||||||
RUN uv pip install -r requirements.txt
|
|
||||||
|
|
||||||
CMD ["python", "scripts/start_data_collection.py", "--config", "config/production.json"]
|
|
||||||
```
|
|
||||||
|
|
||||||
### Systemd Service
|
|
||||||
|
|
||||||
Create a systemd service for Linux deployment:
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[Unit]
|
|
||||||
Description=Cryptocurrency Data Collection Service
|
|
||||||
After=network.target postgres.service redis.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=simple
|
|
||||||
User=crypto-collector
|
|
||||||
WorkingDirectory=/opt/crypto-dashboard
|
|
||||||
ExecStart=/usr/bin/python scripts/start_data_collection.py --config config/production.json
|
|
||||||
Restart=always
|
|
||||||
RestartSec=10
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
```
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
Configure sensitive data via environment variables:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export POSTGRES_HOST=localhost
|
|
||||||
export POSTGRES_PORT=5432
|
|
||||||
export POSTGRES_DB=crypto_dashboard
|
|
||||||
export POSTGRES_USER=dashboard_user
|
|
||||||
export POSTGRES_PASSWORD=secure_password
|
|
||||||
export REDIS_HOST=localhost
|
|
||||||
export REDIS_PORT=6379
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance Considerations
|
|
||||||
|
|
||||||
### Resource Usage
|
|
||||||
|
|
||||||
- **Memory**: ~100MB base + ~10MB per trading pair
|
|
||||||
- **CPU**: Low (async I/O bound)
|
|
||||||
- **Network**: ~1KB/s per trading pair
|
|
||||||
- **Storage**: ~1GB/day per trading pair (with raw data)
|
|
||||||
|
|
||||||
### Scaling
|
|
||||||
|
|
||||||
- **Vertical**: Increase timeframes and trading pairs
|
|
||||||
- **Horizontal**: Run multiple services with different configurations
|
|
||||||
- **Database**: Use TimescaleDB for time-series optimization
|
|
||||||
|
|
||||||
### Optimization Tips
|
|
||||||
|
|
||||||
1. **Disable Raw Data**: Set `store_raw_data: false` to reduce storage
|
|
||||||
2. **Limit Timeframes**: Only collect needed timeframes
|
|
||||||
3. **Batch Processing**: Use longer health check intervals
|
|
||||||
4. **Connection Pooling**: Database connections are automatically pooled
|
|
||||||
|
|
||||||
## Changelog
|
|
||||||
|
|
||||||
### v1.0.0 (Current)
|
|
||||||
|
|
||||||
- Initial implementation
|
|
||||||
- OKX exchange support
|
|
||||||
- Clean logging system
|
|
||||||
- Comprehensive test coverage
|
|
||||||
- JSON configuration
|
|
||||||
- Health monitoring
|
|
||||||
- Graceful shutdown
|
|
||||||
@ -1,292 +0,0 @@
|
|||||||
# Conditional Logging System
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The TCP Dashboard project implements a sophisticated conditional logging system that provides fine-grained control over logging behavior across all components. This system supports hierarchical logging, conditional logging, and error-only logging modes.
|
|
||||||
|
|
||||||
## Key Features
|
|
||||||
|
|
||||||
### 1. Conditional Logging
|
|
||||||
- **No Logger**: If no logger instance is passed to a component's constructor, that component performs no logging operations
|
|
||||||
- **Logger Provided**: If a logger instance is passed, the component uses it for logging
|
|
||||||
- **Error-Only Mode**: If `log_errors_only=True` is set, only error and critical level messages are logged
|
|
||||||
|
|
||||||
### 2. Logger Inheritance
|
|
||||||
- Components that receive a logger pass the same logger instance down to child components
|
|
||||||
- This creates a hierarchical logging structure that follows the component hierarchy
|
|
||||||
|
|
||||||
### 3. Hierarchical File Organization
|
|
||||||
- Log files are organized based on component hierarchy
|
|
||||||
- Each major component gets its own log directory
|
|
||||||
- Child components log to their parent's log file
|
|
||||||
|
|
||||||
## Component Hierarchy
|
|
||||||
|
|
||||||
```
|
|
||||||
Top-level Application (individual logger)
|
|
||||||
├── ProductionManager (individual logger)
|
|
||||||
│ ├── DataSaver (receives logger from ProductionManager)
|
|
||||||
│ ├── DataValidator (receives logger from ProductionManager)
|
|
||||||
│ ├── DatabaseConnection (receives logger from ProductionManager)
|
|
||||||
│ └── CollectorManager (individual logger)
|
|
||||||
│ ├── OKX collector BTC-USD (individual logger)
|
|
||||||
│ │ ├── DataAggregator (receives logger from OKX collector)
|
|
||||||
│ │ ├── DataTransformer (receives logger from OKX collector)
|
|
||||||
│ │ └── DataProcessor (receives logger from OKX collector)
|
|
||||||
│ └── Another collector...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Basic Usage
|
|
||||||
|
|
||||||
```python
|
|
||||||
from utils.logger import get_logger
|
|
||||||
from data.exchanges.okx.collector import OKXCollector
|
|
||||||
|
|
||||||
# Create a logger for the collector
|
|
||||||
collector_logger = get_logger('okx_collector_btc_usdt', verbose=True)
|
|
||||||
|
|
||||||
# Create collector with logger - all child components will use this logger
|
|
||||||
collector = OKXCollector(
|
|
||||||
symbol='BTC-USDT',
|
|
||||||
logger=collector_logger
|
|
||||||
)
|
|
||||||
|
|
||||||
# Child components (data processor, validator, transformer) will automatically
|
|
||||||
# receive and use the same logger instance
|
|
||||||
```
|
|
||||||
|
|
||||||
### No Logging Mode
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Create collector without logger - no logging will be performed
|
|
||||||
collector = OKXCollector(
|
|
||||||
symbol='BTC-USDT',
|
|
||||||
logger=None # or simply omit the parameter
|
|
||||||
)
|
|
||||||
|
|
||||||
# No log files will be created, no console output
|
|
||||||
```
|
|
||||||
|
|
||||||
### Error-Only Logging Mode
|
|
||||||
|
|
||||||
```python
|
|
||||||
from utils.logger import get_logger
|
|
||||||
from data.collector_manager import CollectorManager
|
|
||||||
|
|
||||||
# Create logger for manager
|
|
||||||
manager_logger = get_logger('collector_manager', verbose=True)
|
|
||||||
|
|
||||||
# Create manager with error-only logging
|
|
||||||
manager = CollectorManager(
|
|
||||||
manager_name="production_manager",
|
|
||||||
logger=manager_logger,
|
|
||||||
log_errors_only=True # Only errors and critical messages will be logged
|
|
||||||
)
|
|
||||||
|
|
||||||
# Manager will only log errors, but child collectors can have their own loggers
|
|
||||||
```
|
|
||||||
|
|
||||||
### Hierarchical Logging Setup
|
|
||||||
|
|
||||||
```python
|
|
||||||
from utils.logger import get_logger
|
|
||||||
from data.collector_manager import CollectorManager
|
|
||||||
from data.exchanges.okx.collector import OKXCollector
|
|
||||||
|
|
||||||
# Create manager with its own logger
|
|
||||||
manager_logger = get_logger('collector_manager', verbose=True)
|
|
||||||
manager = CollectorManager(logger=manager_logger)
|
|
||||||
|
|
||||||
# Create individual collectors with their own loggers
|
|
||||||
btc_logger = get_logger('okx_collector_btc_usdt', verbose=True)
|
|
||||||
eth_logger = get_logger('okx_collector_eth_usdt', verbose=True)
|
|
||||||
|
|
||||||
btc_collector = OKXCollector('BTC-USDT', logger=btc_logger)
|
|
||||||
eth_collector = OKXCollector('ETH-USDT', logger=eth_logger)
|
|
||||||
|
|
||||||
# Add collectors to manager
|
|
||||||
manager.add_collector(btc_collector)
|
|
||||||
manager.add_collector(eth_collector)
|
|
||||||
|
|
||||||
# Result:
|
|
||||||
# - Manager logs to: logs/collector_manager/YYYY-MM-DD.txt
|
|
||||||
# - BTC collector logs to: logs/okx_collector_btc_usdt/YYYY-MM-DD.txt
|
|
||||||
# - ETH collector logs to: logs/okx_collector_eth_usdt/YYYY-MM-DD.txt
|
|
||||||
# - All child components of each collector log to their parent's file
|
|
||||||
```
|
|
||||||
|
|
||||||
## Implementation Details
|
|
||||||
|
|
||||||
### Base Classes
|
|
||||||
|
|
||||||
All base classes support conditional logging:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class BaseDataCollector:
|
|
||||||
def __init__(self, ..., logger=None, log_errors_only=False):
|
|
||||||
self.logger = logger
|
|
||||||
self.log_errors_only = log_errors_only
|
|
||||||
|
|
||||||
def _log_debug(self, message: str) -> None:
|
|
||||||
if self.logger and not self.log_errors_only:
|
|
||||||
self.logger.debug(message)
|
|
||||||
|
|
||||||
def _log_error(self, message: str, exc_info: bool = False) -> None:
|
|
||||||
if self.logger:
|
|
||||||
self.logger.error(message, exc_info=exc_info)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Child Component Pattern
|
|
||||||
|
|
||||||
Child components receive logger from parent:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class OKXCollector(BaseDataCollector):
|
|
||||||
def __init__(self, symbol: str, logger=None):
|
|
||||||
super().__init__(..., logger=logger)
|
|
||||||
|
|
||||||
# Pass logger to child components
|
|
||||||
self._data_processor = OKXDataProcessor(
|
|
||||||
symbol,
|
|
||||||
logger=self.logger # Pass parent's logger
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Conditional Logging Helpers
|
|
||||||
|
|
||||||
All components use helper methods for conditional logging:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _log_debug(self, message: str) -> None:
|
|
||||||
"""Log debug message if logger is available and not in errors-only mode."""
|
|
||||||
if self.logger and not self.log_errors_only:
|
|
||||||
self.logger.debug(message)
|
|
||||||
|
|
||||||
def _log_info(self, message: str) -> None:
|
|
||||||
"""Log info message if logger is available and not in errors-only mode."""
|
|
||||||
if self.logger and not self.log_errors_only:
|
|
||||||
self.logger.info(message)
|
|
||||||
|
|
||||||
def _log_warning(self, message: str) -> None:
|
|
||||||
"""Log warning message if logger is available and not in errors-only mode."""
|
|
||||||
if self.logger and not self.log_errors_only:
|
|
||||||
self.logger.warning(message)
|
|
||||||
|
|
||||||
def _log_error(self, message: str, exc_info: bool = False) -> None:
|
|
||||||
"""Log error message if logger is available (always logs errors)."""
|
|
||||||
if self.logger:
|
|
||||||
self.logger.error(message, exc_info=exc_info)
|
|
||||||
|
|
||||||
def _log_critical(self, message: str, exc_info: bool = False) -> None:
|
|
||||||
"""Log critical message if logger is available (always logs critical)."""
|
|
||||||
if self.logger:
|
|
||||||
self.logger.critical(message, exc_info=exc_info)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Log File Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
logs/
|
|
||||||
├── collector_manager/
|
|
||||||
│ └── 2024-01-15.txt
|
|
||||||
├── okx_collector_btc_usdt/
|
|
||||||
│ └── 2024-01-15.txt
|
|
||||||
├── okx_collector_eth_usdt/
|
|
||||||
│ └── 2024-01-15.txt
|
|
||||||
└── production_manager/
|
|
||||||
└── 2024-01-15.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration Options
|
|
||||||
|
|
||||||
### Logger Parameters
|
|
||||||
|
|
||||||
- `logger`: Logger instance or None
|
|
||||||
- `log_errors_only`: Boolean flag for error-only mode
|
|
||||||
- `verbose`: Console output (when creating new loggers)
|
|
||||||
- `clean_old_logs`: Automatic cleanup of old log files
|
|
||||||
- `max_log_files`: Maximum number of log files to keep
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Enable verbose console logging
|
|
||||||
VERBOSE_LOGGING=true
|
|
||||||
|
|
||||||
# Enable console output
|
|
||||||
LOG_TO_CONSOLE=true
|
|
||||||
```
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
### 1. Component Design
|
|
||||||
- Always accept `logger=None` parameter in constructors
|
|
||||||
- Pass logger to all child components
|
|
||||||
- Use conditional logging helper methods
|
|
||||||
- Never assume logger is available
|
|
||||||
|
|
||||||
### 2. Error Handling
|
|
||||||
- Always log errors regardless of `log_errors_only` setting
|
|
||||||
- Use appropriate log levels
|
|
||||||
- Include context in error messages
|
|
||||||
|
|
||||||
### 3. Performance
|
|
||||||
- Conditional logging has minimal performance impact
|
|
||||||
- Logger checks are fast boolean operations
|
|
||||||
- No string formatting when logging is disabled
|
|
||||||
|
|
||||||
### 4. Testing
|
|
||||||
- Test components with and without loggers
|
|
||||||
- Verify error-only mode works correctly
|
|
||||||
- Check that child components receive loggers properly
|
|
||||||
|
|
||||||
## Migration Guide
|
|
||||||
|
|
||||||
### Updating Existing Components
|
|
||||||
|
|
||||||
1. **Add logger parameter to constructor**:
|
|
||||||
```python
|
|
||||||
def __init__(self, ..., logger=None, log_errors_only=False):
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Add conditional logging helpers**:
|
|
||||||
```python
|
|
||||||
def _log_debug(self, message: str) -> None:
|
|
||||||
if self.logger and not self.log_errors_only:
|
|
||||||
self.logger.debug(message)
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Update all logging calls**:
|
|
||||||
```python
|
|
||||||
# Before
|
|
||||||
self.logger.info("Message")
|
|
||||||
|
|
||||||
# After
|
|
||||||
self._log_info("Message")
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **Pass logger to child components**:
|
|
||||||
```python
|
|
||||||
child = ChildComponent(logger=self.logger)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Testing Changes
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Test without logger
|
|
||||||
component = MyComponent(logger=None)
|
|
||||||
# Should work without errors, no logging
|
|
||||||
|
|
||||||
# Test with logger
|
|
||||||
logger = get_logger('test_component')
|
|
||||||
component = MyComponent(logger=logger)
|
|
||||||
# Should log normally
|
|
||||||
|
|
||||||
# Test error-only mode
|
|
||||||
component = MyComponent(logger=logger, log_errors_only=True)
|
|
||||||
# Should only log errors
|
|
||||||
```
|
|
||||||
|
|
||||||
This conditional logging system provides maximum flexibility while maintaining clean, maintainable code that works in all scenarios.
|
|
||||||
782
docs/services/data_collection_service.md
Normal file
782
docs/services/data_collection_service.md
Normal file
@ -0,0 +1,782 @@
|
|||||||
|
# Data Collection Service
|
||||||
|
|
||||||
|
The Data Collection Service is a production-ready service for cryptocurrency market data collection with clean logging and robust error handling. It provides a service layer that manages multiple data collectors for different trading pairs and exchanges.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The service provides a high-level interface for managing the data collection system, handling configuration, lifecycle management, and monitoring. It acts as a orchestration layer on top of the core data collector components.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Service Lifecycle Management**: Start, stop, and monitor data collection operations
|
||||||
|
- **JSON Configuration**: File-based configuration with automatic defaults
|
||||||
|
- **Clean Production Logging**: Only essential operational information
|
||||||
|
- **Health Monitoring**: Service-level health checks and auto-recovery
|
||||||
|
- **Graceful Shutdown**: Proper signal handling and cleanup
|
||||||
|
- **Multi-Exchange Orchestration**: Coordinate collectors across multiple exchanges
|
||||||
|
- **Production Ready**: Designed for 24/7 operation with monitoring
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start with default configuration (indefinite run)
|
||||||
|
python scripts/start_data_collection.py
|
||||||
|
|
||||||
|
# Run for 8 hours
|
||||||
|
python scripts/start_data_collection.py --hours 8
|
||||||
|
|
||||||
|
# Use custom configuration
|
||||||
|
python scripts/start_data_collection.py --config config/my_config.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monitoring
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check status once
|
||||||
|
python scripts/monitor_clean.py
|
||||||
|
|
||||||
|
# Monitor continuously every 60 seconds
|
||||||
|
python scripts/monitor_clean.py --interval 60
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The service uses JSON configuration files with automatic default creation if none exists.
|
||||||
|
|
||||||
|
### Default Configuration Location
|
||||||
|
|
||||||
|
`config/data_collection.json`
|
||||||
|
|
||||||
|
### Configuration Structure
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"exchanges": {
|
||||||
|
"okx": {
|
||||||
|
"enabled": true,
|
||||||
|
"trading_pairs": [
|
||||||
|
{
|
||||||
|
"symbol": "BTC-USDT",
|
||||||
|
"enabled": true,
|
||||||
|
"data_types": ["trade"],
|
||||||
|
"timeframes": ["1m", "5m", "15m", "1h"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"symbol": "ETH-USDT",
|
||||||
|
"enabled": true,
|
||||||
|
"data_types": ["trade"],
|
||||||
|
"timeframes": ["1m", "5m", "15m", "1h"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"collection_settings": {
|
||||||
|
"health_check_interval": 120,
|
||||||
|
"store_raw_data": true,
|
||||||
|
"auto_restart": true,
|
||||||
|
"max_restart_attempts": 3
|
||||||
|
},
|
||||||
|
"logging": {
|
||||||
|
"level": "INFO",
|
||||||
|
"log_errors_only": true,
|
||||||
|
"verbose_data_logging": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration Options
|
||||||
|
|
||||||
|
#### Exchange Settings
|
||||||
|
|
||||||
|
- **enabled**: Whether to enable this exchange
|
||||||
|
- **trading_pairs**: Array of trading pair configurations
|
||||||
|
|
||||||
|
#### Trading Pair Settings
|
||||||
|
|
||||||
|
- **symbol**: Trading pair symbol (e.g., "BTC-USDT")
|
||||||
|
- **enabled**: Whether to collect data for this pair
|
||||||
|
- **data_types**: Types of data to collect (["trade"], ["ticker"], etc.)
|
||||||
|
- **timeframes**: Candle timeframes to generate (["1m", "5m", "15m", "1h", "4h", "1d"])
|
||||||
|
|
||||||
|
#### Collection Settings
|
||||||
|
|
||||||
|
- **health_check_interval**: Health check frequency in seconds
|
||||||
|
- **store_raw_data**: Whether to store raw trade data
|
||||||
|
- **auto_restart**: Enable automatic restart on failures
|
||||||
|
- **max_restart_attempts**: Maximum restart attempts before giving up
|
||||||
|
|
||||||
|
#### Logging Settings
|
||||||
|
|
||||||
|
- **level**: Log level ("DEBUG", "INFO", "WARNING", "ERROR")
|
||||||
|
- **log_errors_only**: Only log errors and essential events
|
||||||
|
- **verbose_data_logging**: Enable verbose logging of individual trades/candles
|
||||||
|
|
||||||
|
## Service Architecture
|
||||||
|
|
||||||
|
### Service Layer Components
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────┐
|
||||||
|
│ DataCollectionService │
|
||||||
|
│ ┌─────────────────────────────────────────┐ │
|
||||||
|
│ │ Configuration Manager │ │
|
||||||
|
│ │ • JSON config loading/validation │ │
|
||||||
|
│ │ • Default config generation │ │
|
||||||
|
│ │ • Runtime config updates │ │
|
||||||
|
│ └─────────────────────────────────────────┘ │
|
||||||
|
│ ┌─────────────────────────────────────────┐ │
|
||||||
|
│ │ Service Monitor │ │
|
||||||
|
│ │ • Service-level health checks │ │
|
||||||
|
│ │ • Uptime tracking │ │
|
||||||
|
│ │ • Error aggregation │ │
|
||||||
|
│ └─────────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ┌─────────────────────────────────────────┐ │
|
||||||
|
│ │ CollectorManager │ │
|
||||||
|
│ │ • Individual collector management │ │
|
||||||
|
│ │ • Health monitoring │ │
|
||||||
|
│ │ • Auto-restart coordination │ │
|
||||||
|
│ └─────────────────────────────────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌─────────────────────────────┐
|
||||||
|
│ Core Data Collectors │
|
||||||
|
│ (See data_collectors.md) │
|
||||||
|
└─────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Configuration → Service → CollectorManager → Data Collectors → Database
|
||||||
|
↓ ↓
|
||||||
|
Service Monitor Health Monitor
|
||||||
|
```
|
||||||
|
|
||||||
|
### Storage Integration
|
||||||
|
|
||||||
|
- **Raw Data**: PostgreSQL `raw_trades` table via repository pattern
|
||||||
|
- **Candles**: PostgreSQL `market_data` table with multiple timeframes
|
||||||
|
- **Real-time**: Redis pub/sub for live data distribution
|
||||||
|
- **Service Metrics**: Service uptime, error counts, collector statistics
|
||||||
|
|
||||||
|
## Logging Philosophy
|
||||||
|
|
||||||
|
The service implements **clean production logging** focused on operational needs:
|
||||||
|
|
||||||
|
### What Gets Logged
|
||||||
|
|
||||||
|
✅ **Service Lifecycle**
|
||||||
|
- Service start/stop events
|
||||||
|
- Configuration loading
|
||||||
|
- Service initialization
|
||||||
|
|
||||||
|
✅ **Collector Orchestration**
|
||||||
|
- Collector creation and destruction
|
||||||
|
- Service-level health summaries
|
||||||
|
- Recovery operations
|
||||||
|
|
||||||
|
✅ **Configuration Events**
|
||||||
|
- Config file changes
|
||||||
|
- Runtime configuration updates
|
||||||
|
- Validation errors
|
||||||
|
|
||||||
|
✅ **Service Statistics**
|
||||||
|
- Periodic uptime reports
|
||||||
|
- Collection summary statistics
|
||||||
|
- Performance metrics
|
||||||
|
|
||||||
|
### What Doesn't Get Logged
|
||||||
|
|
||||||
|
❌ **Individual Data Points**
|
||||||
|
- Every trade received
|
||||||
|
- Every candle generated
|
||||||
|
- Raw market data
|
||||||
|
|
||||||
|
❌ **Internal Operations**
|
||||||
|
- Individual collector heartbeats
|
||||||
|
- Routine database operations
|
||||||
|
- Internal processing steps
|
||||||
|
|
||||||
|
## API Reference
|
||||||
|
|
||||||
|
### DataCollectionService
|
||||||
|
|
||||||
|
The main service class for managing data collection operations.
|
||||||
|
|
||||||
|
#### Constructor
|
||||||
|
|
||||||
|
```python
|
||||||
|
DataCollectionService(config_path: str = "config/data_collection.json")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `config_path`: Path to JSON configuration file
|
||||||
|
|
||||||
|
#### Methods
|
||||||
|
|
||||||
|
##### `async run(duration_hours: Optional[float] = None) -> bool`
|
||||||
|
|
||||||
|
Run the service for a specified duration or indefinitely.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `duration_hours`: Optional duration in hours (None = indefinite)
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `bool`: True if successful, False if error occurred
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```python
|
||||||
|
service = DataCollectionService()
|
||||||
|
await service.run(duration_hours=24) # Run for 24 hours
|
||||||
|
```
|
||||||
|
|
||||||
|
##### `async start() -> bool`
|
||||||
|
|
||||||
|
Start the data collection service and all configured collectors.
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `bool`: True if started successfully
|
||||||
|
|
||||||
|
##### `async stop() -> None`
|
||||||
|
|
||||||
|
Stop the service gracefully, including all collectors and cleanup.
|
||||||
|
|
||||||
|
##### `get_status() -> Dict[str, Any]`
|
||||||
|
|
||||||
|
Get current service status including uptime, collector counts, and errors.
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
'service_running': True,
|
||||||
|
'uptime_hours': 12.5,
|
||||||
|
'collectors_total': 6,
|
||||||
|
'collectors_running': 5,
|
||||||
|
'collectors_failed': 1,
|
||||||
|
'errors_count': 2,
|
||||||
|
'last_error': 'Connection timeout for ETH-USDT',
|
||||||
|
'configuration': {
|
||||||
|
'config_file': 'config/data_collection.json',
|
||||||
|
'exchanges_enabled': ['okx'],
|
||||||
|
'total_trading_pairs': 6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
##### `async initialize_collectors() -> bool`
|
||||||
|
|
||||||
|
Initialize all collectors based on configuration.
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `bool`: True if all collectors initialized successfully
|
||||||
|
|
||||||
|
##### `load_configuration() -> Dict[str, Any]`
|
||||||
|
|
||||||
|
Load and validate configuration from file.
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `dict`: Loaded configuration
|
||||||
|
|
||||||
|
### Standalone Function
|
||||||
|
|
||||||
|
#### `run_data_collection_service(config_path, duration_hours)`
|
||||||
|
|
||||||
|
```python
|
||||||
|
async def run_data_collection_service(
|
||||||
|
config_path: str = "config/data_collection.json",
|
||||||
|
duration_hours: Optional[float] = None
|
||||||
|
) -> bool
|
||||||
|
```
|
||||||
|
|
||||||
|
Convenience function to run the service with minimal setup.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `config_path`: Path to configuration file
|
||||||
|
- `duration_hours`: Optional duration in hours
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `bool`: True if successful
|
||||||
|
|
||||||
|
## Integration Examples
|
||||||
|
|
||||||
|
### Basic Service Integration
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
from data.collection_service import DataCollectionService
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
service = DataCollectionService("config/my_config.json")
|
||||||
|
|
||||||
|
# Run for 24 hours
|
||||||
|
success = await service.run(duration_hours=24)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
print("Service encountered errors")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Custom Status Monitoring
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
from data.collection_service import DataCollectionService
|
||||||
|
|
||||||
|
async def monitor_service():
|
||||||
|
service = DataCollectionService()
|
||||||
|
|
||||||
|
# Start service in background
|
||||||
|
start_task = asyncio.create_task(service.run())
|
||||||
|
|
||||||
|
# Monitor status every 5 minutes
|
||||||
|
while service.running:
|
||||||
|
status = service.get_status()
|
||||||
|
print(f"Service Uptime: {status['uptime_hours']:.1f}h")
|
||||||
|
print(f"Collectors: {status['collectors_running']}/{status['collectors_total']}")
|
||||||
|
print(f"Errors: {status['errors_count']}")
|
||||||
|
|
||||||
|
await asyncio.sleep(300) # 5 minutes
|
||||||
|
|
||||||
|
await start_task
|
||||||
|
|
||||||
|
asyncio.run(monitor_service())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Programmatic Control
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
from data.collection_service import DataCollectionService
|
||||||
|
|
||||||
|
async def controlled_collection():
|
||||||
|
service = DataCollectionService()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Initialize and start
|
||||||
|
await service.initialize_collectors()
|
||||||
|
await service.start()
|
||||||
|
|
||||||
|
# Monitor and control
|
||||||
|
while True:
|
||||||
|
status = service.get_status()
|
||||||
|
|
||||||
|
# Check if any collectors failed
|
||||||
|
if status['collectors_failed'] > 0:
|
||||||
|
print("Some collectors failed, checking health...")
|
||||||
|
# Service auto-restart will handle this
|
||||||
|
|
||||||
|
await asyncio.sleep(60) # Check every minute
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Shutting down service...")
|
||||||
|
finally:
|
||||||
|
await service.stop()
|
||||||
|
|
||||||
|
asyncio.run(controlled_collection())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration Management
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from data.collection_service import DataCollectionService
|
||||||
|
|
||||||
|
async def dynamic_configuration():
|
||||||
|
service = DataCollectionService()
|
||||||
|
|
||||||
|
# Load and modify configuration
|
||||||
|
config = service.load_configuration()
|
||||||
|
|
||||||
|
# Add new trading pair
|
||||||
|
config['exchanges']['okx']['trading_pairs'].append({
|
||||||
|
'symbol': 'SOL-USDT',
|
||||||
|
'enabled': True,
|
||||||
|
'data_types': ['trade'],
|
||||||
|
'timeframes': ['1m', '5m']
|
||||||
|
})
|
||||||
|
|
||||||
|
# Save updated configuration
|
||||||
|
with open('config/data_collection.json', 'w') as f:
|
||||||
|
json.dump(config, f, indent=2)
|
||||||
|
|
||||||
|
# Restart service with new config
|
||||||
|
await service.stop()
|
||||||
|
await service.start()
|
||||||
|
|
||||||
|
asyncio.run(dynamic_configuration())
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
The service implements robust error handling at the service orchestration level:
|
||||||
|
|
||||||
|
### Service Level Errors
|
||||||
|
|
||||||
|
- **Configuration Errors**: Invalid JSON, missing required fields
|
||||||
|
- **Initialization Errors**: Failed collector creation, database connectivity
|
||||||
|
- **Runtime Errors**: Service-level exceptions, resource exhaustion
|
||||||
|
|
||||||
|
### Error Recovery Strategies
|
||||||
|
|
||||||
|
1. **Graceful Degradation**: Continue with healthy collectors
|
||||||
|
2. **Configuration Validation**: Validate before applying changes
|
||||||
|
3. **Service Restart**: Full service restart on critical errors
|
||||||
|
4. **Error Aggregation**: Collect and report errors across all collectors
|
||||||
|
|
||||||
|
### Error Reporting
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Service status includes error information
|
||||||
|
status = service.get_status()
|
||||||
|
|
||||||
|
if status['errors_count'] > 0:
|
||||||
|
print(f"Service has {status['errors_count']} errors")
|
||||||
|
print(f"Last error: {status['last_error']}")
|
||||||
|
|
||||||
|
# Get detailed error information from collectors
|
||||||
|
for collector_name in service.manager.list_collectors():
|
||||||
|
collector_status = service.manager.get_collector_status(collector_name)
|
||||||
|
if collector_status['status'] == 'error':
|
||||||
|
print(f"Collector {collector_name}: {collector_status['statistics']['last_error']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
### Running Service Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run all data collection service tests
|
||||||
|
uv run pytest tests/test_data_collection_service.py -v
|
||||||
|
|
||||||
|
# Run specific test categories
|
||||||
|
uv run pytest tests/test_data_collection_service.py::TestDataCollectionService -v
|
||||||
|
|
||||||
|
# Run with coverage
|
||||||
|
uv run pytest tests/test_data_collection_service.py --cov=data.collection_service
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test Coverage
|
||||||
|
|
||||||
|
The service test suite covers:
|
||||||
|
- Service initialization and configuration loading
|
||||||
|
- Collector orchestration and management
|
||||||
|
- Service lifecycle (start/stop/restart)
|
||||||
|
- Configuration validation and error handling
|
||||||
|
- Signal handling and graceful shutdown
|
||||||
|
- Status reporting and monitoring
|
||||||
|
- Error aggregation and recovery
|
||||||
|
|
||||||
|
### Mock Testing
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
from data.collection_service import DataCollectionService
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_service_with_mock_collectors():
|
||||||
|
with patch('data.collection_service.CollectorManager') as mock_manager:
|
||||||
|
# Mock successful initialization
|
||||||
|
mock_manager.return_value.start.return_value = True
|
||||||
|
|
||||||
|
service = DataCollectionService()
|
||||||
|
result = await service.start()
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
mock_manager.return_value.start.assert_called_once()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Production Deployment
|
||||||
|
|
||||||
|
### Docker Deployment
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN pip install uv
|
||||||
|
RUN uv pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Create logs and config directories
|
||||||
|
RUN mkdir -p logs config
|
||||||
|
|
||||||
|
# Copy production configuration
|
||||||
|
COPY config/production.json config/data_collection.json
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=60s --timeout=10s --start-period=30s --retries=3 \
|
||||||
|
CMD python scripts/health_check.py || exit 1
|
||||||
|
|
||||||
|
# Run service
|
||||||
|
CMD ["python", "scripts/start_data_collection.py", "--config", "config/data_collection.json"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: data-collection-service
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: data-collection-service
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: data-collection-service
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: data-collector
|
||||||
|
image: crypto-dashboard/data-collector:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
env:
|
||||||
|
- name: POSTGRES_HOST
|
||||||
|
value: "postgres-service"
|
||||||
|
- name: REDIS_HOST
|
||||||
|
value: "redis-service"
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
mountPath: /app/config
|
||||||
|
- name: logs-volume
|
||||||
|
mountPath: /app/logs
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- python
|
||||||
|
- scripts/health_check.py
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 60
|
||||||
|
volumes:
|
||||||
|
- name: config-volume
|
||||||
|
configMap:
|
||||||
|
name: data-collection-config
|
||||||
|
- name: logs-volume
|
||||||
|
emptyDir: {}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Systemd Service
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[Unit]
|
||||||
|
Description=Cryptocurrency Data Collection Service
|
||||||
|
After=network.target postgres.service redis.service
|
||||||
|
Requires=postgres.service redis.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=crypto-collector
|
||||||
|
Group=crypto-collector
|
||||||
|
WorkingDirectory=/opt/crypto-dashboard
|
||||||
|
ExecStart=/usr/bin/python scripts/start_data_collection.py --config config/production.json
|
||||||
|
ExecReload=/bin/kill -HUP $MAINPID
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10
|
||||||
|
KillMode=mixed
|
||||||
|
TimeoutStopSec=30
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
Environment=PYTHONPATH=/opt/crypto-dashboard
|
||||||
|
Environment=LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# Security
|
||||||
|
NoNewPrivileges=true
|
||||||
|
PrivateTmp=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ReadWritePaths=/opt/crypto-dashboard/logs
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Production environment variables
|
||||||
|
export ENVIRONMENT=production
|
||||||
|
export POSTGRES_HOST=postgres.internal
|
||||||
|
export POSTGRES_PORT=5432
|
||||||
|
export POSTGRES_DB=crypto_dashboard
|
||||||
|
export POSTGRES_USER=dashboard_user
|
||||||
|
export POSTGRES_PASSWORD=secure_password
|
||||||
|
export REDIS_HOST=redis.internal
|
||||||
|
export REDIS_PORT=6379
|
||||||
|
|
||||||
|
# Service configuration
|
||||||
|
export DATA_COLLECTION_CONFIG=/etc/crypto-dashboard/data_collection.json
|
||||||
|
export LOG_LEVEL=INFO
|
||||||
|
export HEALTH_CHECK_INTERVAL=120
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring and Alerting
|
||||||
|
|
||||||
|
### Metrics Collection
|
||||||
|
|
||||||
|
The service exposes metrics for monitoring systems:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Service metrics
|
||||||
|
service_uptime_hours = 24.5
|
||||||
|
collectors_running = 5
|
||||||
|
collectors_total = 6
|
||||||
|
errors_per_hour = 0.2
|
||||||
|
data_points_processed = 15000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Checks
|
||||||
|
|
||||||
|
```python
|
||||||
|
# External health check endpoint
|
||||||
|
async def health_check():
|
||||||
|
service = DataCollectionService()
|
||||||
|
status = service.get_status()
|
||||||
|
|
||||||
|
if not status['service_running']:
|
||||||
|
return {'status': 'unhealthy', 'reason': 'service_stopped'}
|
||||||
|
|
||||||
|
if status['collectors_failed'] > status['collectors_total'] * 0.5:
|
||||||
|
return {'status': 'degraded', 'reason': 'too_many_failed_collectors'}
|
||||||
|
|
||||||
|
return {'status': 'healthy'}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Alerting Rules
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Prometheus alerting rules
|
||||||
|
groups:
|
||||||
|
- name: data_collection_service
|
||||||
|
rules:
|
||||||
|
- alert: DataCollectionServiceDown
|
||||||
|
expr: up{job="data-collection-service"} == 0
|
||||||
|
for: 5m
|
||||||
|
annotations:
|
||||||
|
summary: "Data collection service is down"
|
||||||
|
|
||||||
|
- alert: TooManyFailedCollectors
|
||||||
|
expr: collectors_failed / collectors_total > 0.5
|
||||||
|
for: 10m
|
||||||
|
annotations:
|
||||||
|
summary: "More than 50% of collectors have failed"
|
||||||
|
|
||||||
|
- alert: HighErrorRate
|
||||||
|
expr: rate(errors_total[5m]) > 0.1
|
||||||
|
for: 15m
|
||||||
|
annotations:
|
||||||
|
summary: "High error rate in data collection service"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
### Resource Usage
|
||||||
|
|
||||||
|
- **Memory**: ~150MB base + ~15MB per trading pair (including service overhead)
|
||||||
|
- **CPU**: Low (async I/O bound, service orchestration)
|
||||||
|
- **Network**: ~1KB/s per trading pair
|
||||||
|
- **Storage**: Service logs ~10MB/day
|
||||||
|
|
||||||
|
### Scaling Strategies
|
||||||
|
|
||||||
|
1. **Horizontal Scaling**: Multiple service instances with different configurations
|
||||||
|
2. **Configuration Partitioning**: Separate services by exchange or asset class
|
||||||
|
3. **Load Balancing**: Distribute trading pairs across service instances
|
||||||
|
4. **Regional Deployment**: Deploy closer to exchange data centers
|
||||||
|
|
||||||
|
### Optimization Tips
|
||||||
|
|
||||||
|
1. **Configuration Tuning**: Optimize health check intervals and timeframes
|
||||||
|
2. **Resource Limits**: Set appropriate memory and CPU limits
|
||||||
|
3. **Batch Operations**: Use efficient database operations
|
||||||
|
4. **Monitoring Overhead**: Balance monitoring frequency with performance
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Service Issues
|
||||||
|
|
||||||
|
#### Service Won't Start
|
||||||
|
|
||||||
|
```
|
||||||
|
❌ Failed to start data collection service
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. Check configuration file validity
|
||||||
|
2. Verify database connectivity
|
||||||
|
3. Ensure no port conflicts
|
||||||
|
4. Check file permissions
|
||||||
|
|
||||||
|
#### Configuration Loading Failed
|
||||||
|
|
||||||
|
```
|
||||||
|
❌ Failed to load config from config/data_collection.json: Invalid JSON
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. Validate JSON syntax
|
||||||
|
2. Check required fields
|
||||||
|
3. Verify file encoding (UTF-8)
|
||||||
|
4. Recreate default configuration
|
||||||
|
|
||||||
|
#### No Collectors Created
|
||||||
|
|
||||||
|
```
|
||||||
|
❌ No collectors were successfully initialized
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. Check exchange configuration
|
||||||
|
2. Verify trading pair symbols
|
||||||
|
3. Check network connectivity
|
||||||
|
4. Review collector creation logs
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
Enable verbose service debugging:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"logging": {
|
||||||
|
"level": "DEBUG",
|
||||||
|
"log_errors_only": false,
|
||||||
|
"verbose_data_logging": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Diagnostics
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Run diagnostic check
|
||||||
|
from data.collection_service import DataCollectionService
|
||||||
|
|
||||||
|
service = DataCollectionService()
|
||||||
|
status = service.get_status()
|
||||||
|
|
||||||
|
print(f"Service Running: {status['service_running']}")
|
||||||
|
print(f"Configuration File: {status['configuration']['config_file']}")
|
||||||
|
print(f"Collectors: {status['collectors_running']}/{status['collectors_total']}")
|
||||||
|
|
||||||
|
# Check individual collector health
|
||||||
|
for collector_name in service.manager.list_collectors():
|
||||||
|
collector_status = service.manager.get_collector_status(collector_name)
|
||||||
|
print(f"{collector_name}: {collector_status['status']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Documentation
|
||||||
|
|
||||||
|
- [Data Collectors System](../components/data_collectors.md) - Core collector components
|
||||||
|
- [Logging System](../components/logging.md) - Logging configuration
|
||||||
|
- [Database Operations](../database/operations.md) - Database integration
|
||||||
|
- [Monitoring Guide](../monitoring/README.md) - System monitoring setup
|
||||||
Loading…
x
Reference in New Issue
Block a user