Enhance error handling and security measures in data collection services

- Implemented `_sanitize_error` method in `DataCollectionService` and `CollectorManager` to prevent leaking internal error details.
- Improved error handling across various methods by catching specific exceptions and logging sanitized messages with `exc_info=True`.
- Added file permission validation in `ServiceConfig` to ensure secure configuration file handling, including detailed logging for permission issues.
- Refactored logging practices to enhance clarity and maintainability, ensuring consistent error reporting.

These changes significantly bolster the security and robustness of the data collection services, aligning with project standards for error handling and maintainability.
This commit is contained in:
Vasily.onl 2025-06-10 13:12:13 +08:00
parent 6ab40414e7
commit c28e4a9aaf
5 changed files with 312 additions and 30 deletions

View File

@ -73,9 +73,25 @@ class ServiceConfig:
return validated_config
except Exception as e:
except (FileNotFoundError, IsADirectoryError, PermissionError) as e:
# Handle file system related errors
if self.logger:
self.logger.error(f"❌ Failed to load configuration: {e}", exc_info=True)
self.logger.error(f"❌ File system error loading configuration: {e}", exc_info=True)
raise
except (json.JSONDecodeError, UnicodeDecodeError) as e:
# Handle file parsing errors
if self.logger:
self.logger.error(f"❌ Configuration file parsing error: {e}", exc_info=True)
raise ValueError(f"Invalid configuration file format: {e}")
except (KeyError, ValueError, TypeError) as e:
# Handle configuration validation errors
if self.logger:
self.logger.error(f"❌ Configuration validation error: {e}", exc_info=True)
raise
except Exception as e:
# Catch any other unexpected errors
if self.logger:
self.logger.error(f"❌ Unexpected error loading configuration: {e}", exc_info=True)
raise
def _validate_file_permissions(self, config_file: Path) -> None:
@ -87,24 +103,84 @@ class ServiceConfig:
Raises:
PermissionError: If file permissions are too permissive
ValueError: If file validation fails
"""
try:
file_stat = config_file.stat()
file_mode = file_stat.st_mode
# List of security violations
violations = []
# Check if file is readable by others (security risk)
if file_mode & stat.S_IROTH:
violations.append("readable by others")
if self.logger:
self.logger.warning(f"⚠️ Configuration file {config_file} is readable by others")
# Check if file is writable by others (security risk)
if file_mode & stat.S_IWOTH:
violations.append("writable by others")
if self.logger:
self.logger.warning(f"⚠️ Configuration file {config_file} is writable by others")
# Check if file is writable by group (security risk)
if file_mode & stat.S_IWGRP:
violations.append("writable by group")
if self.logger:
self.logger.warning(f"⚠️ Configuration file {config_file} is writable by group")
# Check if file is readable by group (potential security risk)
if file_mode & stat.S_IRGRP:
violations.append("readable by group")
if self.logger:
self.logger.warning(f"⚠️ Configuration file {config_file} is readable by group")
# Check if file is executable (unnecessary for config files)
if file_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH):
violations.append("executable")
if self.logger:
self.logger.warning(f"⚠️ Configuration file {config_file} has execute permissions")
# Enforce security policy - raise exception if critical violations found
critical_violations = [v for v in violations if "writable by" in v or "readable by others" in v]
if critical_violations:
error_msg = f"Configuration file {config_file} has insecure permissions: {', '.join(critical_violations)}. " \
f"File should only be readable/writable by owner (mode 600 recommended)."
if self.logger:
self.logger.error(f"🔒 Security violation: {error_msg}", exc_info=True)
# On Windows, permission checks might not work as expected, so we log but don't fail
if os.name == 'nt': # Windows
if self.logger:
self.logger.warning("⚠️ On Windows, file permission validation is limited. Please ensure config files are properly secured.")
else:
# On Unix-like systems, enforce strict permissions
raise PermissionError(error_msg)
# Validate file ownership (only on Unix-like systems)
if os.name != 'nt':
current_uid = os.getuid() if hasattr(os, 'getuid') else None
file_uid = file_stat.st_uid
if current_uid is not None and file_uid != current_uid:
warning_msg = f"Configuration file {config_file} is owned by different user (UID: {file_uid}, current: {current_uid})"
if self.logger:
self.logger.warning(f"⚠️ {warning_msg}")
# Log successful validation
if not violations and self.logger:
self.logger.debug(f"✅ Configuration file {config_file} has secure permissions")
except PermissionError:
# Re-raise permission errors
raise
except Exception as e:
error_msg = f"Could not validate file permissions for {config_file}: {e}"
if self.logger:
self.logger.warning(f"⚠️ Could not validate file permissions: {e}")
self.logger.error(f"{error_msg}", exc_info=True)
raise ValueError(error_msg)
def _validate_config_schema(self, config: Dict[str, Any]) -> Dict[str, Any]:
"""
@ -194,15 +270,78 @@ class ServiceConfig:
json.dump(default_config, f, indent=2)
# Set secure file permissions (owner read/write only)
try:
os.chmod(config_file, stat.S_IRUSR | stat.S_IWUSR)
except Exception as e:
if self.logger:
self.logger.warning(f"⚠️ Could not set secure file permissions: {e}")
self._set_secure_permissions(config_file)
if self.logger:
self.logger.info(f"📄 Created default configuration: {config_file}")
def _set_secure_permissions(self, config_file: Path) -> None:
"""
Set secure file permissions for configuration file.
Args:
config_file: Path to the configuration file
"""
try:
# Set permissions to owner read/write only (mode 600)
secure_mode = stat.S_IRUSR | stat.S_IWUSR
os.chmod(config_file, secure_mode)
if self.logger:
self.logger.debug(f"Set secure permissions (600) for {config_file}")
except (OSError, PermissionError) as e:
# Handle file system permission errors
if self.logger:
self.logger.warning(f"Could not set secure file permissions for {config_file}: {e}")
except Exception as e:
# Handle any other unexpected errors
if self.logger:
self.logger.warning(f"Unexpected error setting file permissions for {config_file}: {e}")
def fix_file_permissions(self, config_file_path: Optional[str] = None) -> bool:
"""
Fix file permissions for configuration file to be secure.
Args:
config_file_path: Optional specific file path, defaults to instance config_path
Returns:
bool: True if permissions were fixed successfully, False otherwise
"""
try:
file_path = Path(config_file_path or self.config_path)
if not file_path.exists():
if self.logger:
self.logger.error(f"Configuration file {file_path} does not exist", exc_info=True)
return False
# Set secure permissions
self._set_secure_permissions(file_path)
# Verify the fix worked
try:
self._validate_file_permissions(file_path)
if self.logger:
self.logger.info(f"✅ Successfully fixed permissions for {file_path}")
return True
except (PermissionError, ValueError):
if self.logger:
self.logger.warning(f"⚠️ Permissions may still be insecure for {file_path}")
return False
except (FileNotFoundError, PermissionError, OSError) as e:
# Handle file system related errors
if self.logger:
self.logger.error(f"❌ File system error fixing permissions: {e}", exc_info=True)
return False
except Exception as e:
# Handle any other unexpected errors
if self.logger:
self.logger.error(f"❌ Unexpected error fixing file permissions: {e}", exc_info=True)
return False
def _get_default_connection_config(self) -> Dict[str, Any]:
"""Get default connection configuration."""
return {
@ -318,13 +457,18 @@ class ServiceConfig:
self.logger.info("Configuration updated in memory")
def save_config(self) -> None:
"""Save current configuration to file."""
"""Save current configuration to file with secure permissions."""
if self._config is None:
raise RuntimeError("Configuration has not been loaded. Call load_config() first.")
config_file = Path(self.config_path)
# Save configuration
with open(config_file, 'w') as f:
json.dump(self._config, f, indent=2)
# Ensure secure permissions after saving
self._set_secure_permissions(config_file)
if self.logger:
self.logger.info(f"Configuration saved to {self.config_path}")
self.logger.info(f"Configuration saved to {self.config_path} with secure permissions")

View File

@ -71,13 +71,57 @@ class DataCollectionService:
self.logger.info("🚀 Data Collection Service initialized")
self.logger.info(f"📁 Configuration: {config_path}")
def _sanitize_error(self, message: str) -> str:
"""
Sanitize error message to prevent leaking internal details.
Args:
message: Original error message
Returns:
Sanitized error message
"""
# Remove sensitive patterns that might leak internal information
sensitive_patterns = [
'password=',
'token=',
'key=',
'secret=',
'auth=',
'api_key=',
'api_secret=',
'access_token=',
'refresh_token='
]
sanitized = message
for pattern in sensitive_patterns:
if pattern.lower() in sanitized.lower():
# Replace the value part after = with [REDACTED]
parts = sanitized.split(pattern)
if len(parts) > 1:
# Find the end of the value (space, comma, or end of string)
value_part = parts[1]
end_chars = [' ', ',', ')', ']', '}', '\n', '\t']
end_idx = len(value_part)
for char in end_chars:
char_idx = value_part.find(char)
if char_idx != -1 and char_idx < end_idx:
end_idx = char_idx
# Replace the value with [REDACTED]
sanitized = parts[0] + pattern + '[REDACTED]' + value_part[end_idx:]
return sanitized
async def initialize_collectors(self) -> bool:
"""Initialize all data collectors based on configuration."""
try:
collectors = await self.collector_factory.create_collectors_from_config(self.config)
if not collectors:
self.logger.error("❌ No collectors were successfully created")
self.logger.error("❌ No collectors were successfully created", exc_info=True)
return False
for collector in collectors:
@ -88,8 +132,22 @@ class DataCollectionService:
self.logger.info(f"✅ Successfully initialized {len(collectors)} data collectors")
return True
except (KeyError, AttributeError, TypeError) as e:
# Handle configuration and data structure errors
sanitized_message = self._sanitize_error(f"❌ Configuration error initializing collectors: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
except (ConnectionError, OSError, IOError) as e:
# Handle connection and I/O related errors
sanitized_message = self._sanitize_error(f"❌ Connection/IO error initializing collectors: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
except Exception as e:
self.logger.error(f"❌ Failed to initialize collectors: {e}", exc_info=True)
# Catch any other unexpected errors
sanitized_message = self._sanitize_error(f"❌ Unexpected error initializing collectors: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
@ -117,12 +175,26 @@ class DataCollectionService:
self.logger.info(f"📈 Active collectors: {self.stats['collectors_running']}")
return True
else:
self.logger.error("Failed to start data collectors")
self.logger.error("Failed to start data collectors", exc_info=True)
self.stats['errors_count'] += 1
return False
except (ConnectionError, OSError, IOError) as e:
# Handle database and connection errors
sanitized_message = self._sanitize_error(f"Database/Connection error starting service: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
except (AttributeError, TypeError, ValueError) as e:
# Handle configuration and data validation errors
sanitized_message = self._sanitize_error(f"❌ Configuration error starting service: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
except Exception as e:
self.logger.error(f"❌ Failed to start service: {e}", exc_info=True)
# Catch any other unexpected errors
sanitized_message = self._sanitize_error(f"❌ Unexpected error starting service: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
@ -144,8 +216,19 @@ class DataCollectionService:
self.logger.info("✅ Data Collection Service stopped gracefully")
self.logger.info(f"📊 Total uptime: {self.stats['total_uptime_seconds']:.1f} seconds")
except (asyncio.CancelledError, KeyboardInterrupt):
# Handle graceful shutdown scenarios
self.logger.warning("Service shutdown was interrupted")
self.stats['errors_count'] += 1
except (ConnectionError, OSError, IOError) as e:
# Handle connection and I/O related errors during shutdown
sanitized_message = self._sanitize_error(f"Connection/IO error during service shutdown: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
except Exception as e:
self.logger.error(f"❌ Error during service shutdown: {e}", exc_info=True)
# Catch any other unexpected errors during shutdown
sanitized_message = self._sanitize_error(f"Unexpected error during service shutdown: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
def get_status(self) -> Dict[str, Any]:
@ -234,8 +317,20 @@ class DataCollectionService:
return True
except (asyncio.CancelledError, KeyboardInterrupt):
# Handle graceful shutdown scenarios
self.logger.info("Service run was cancelled gracefully")
return True
except (asyncio.TimeoutError, ConnectionError, OSError, IOError) as e:
# Handle timeout, connection and I/O related errors
sanitized_message = self._sanitize_error(f"Connection/Timeout error during service run: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
except Exception as e:
self.logger.error(f"❌ Service error: {e}", exc_info=True)
# Catch any other unexpected errors
sanitized_message = self._sanitize_error(f"Unexpected service error: {e}")
self.logger.error(sanitized_message, exc_info=True)
self.stats['errors_count'] += 1
return False
finally:

View File

@ -71,7 +71,7 @@ class CollectorFactory:
return collector
else:
if self.logger:
self.logger.error(f"Failed to create collector for {symbol}")
self.logger.error(f"Failed to create collector for {symbol}", exc_info=True)
return None
except Exception as e:
@ -94,13 +94,13 @@ class CollectorFactory:
# Validate required fields
if 'symbol' not in pair_config:
if self.logger:
self.logger.error("Trading pair missing required 'symbol' field")
self.logger.error("Trading pair missing required 'symbol' field", exc_info=True)
return None
symbol = pair_config['symbol']
if not isinstance(symbol, str) or '-' not in symbol:
if self.logger:
self.logger.error(f"Invalid symbol format: {symbol}. Expected format: 'BASE-QUOTE'")
self.logger.error(f"Invalid symbol format: {symbol}. Expected format: 'BASE-QUOTE'", exc_info=True)
return None
# Apply defaults and validate data types
@ -239,7 +239,7 @@ class CollectorFactory:
else:
symbol = pair_config.get('symbol', 'unknown')
if self.logger:
self.logger.error(f"Failed to create collector for {symbol}")
self.logger.error(f"Failed to create collector for {symbol}", exc_info=True)
if self.logger:
self.logger.info(f"Successfully created {len(collectors)} collectors")

View File

@ -56,6 +56,19 @@ class CollectorManager:
if self.logger_manager.is_debug_enabled():
self.logger_manager.log_info(f"Initialized collector manager: {manager_name}")
def _sanitize_error(self, message: str) -> str:
"""
Sanitize error message to prevent leaking internal details.
Args:
message: Original error message
Returns:
Sanitized error message
"""
# Delegate to the logger manager's sanitization method
return self.logger_manager._sanitize_error(message)
def add_collector(self, collector: BaseDataCollector, config: Optional[CollectorConfig] = None) -> None:
"""Add a collector to be managed."""
self.lifecycle_manager.add_collector(collector, config)
@ -107,9 +120,28 @@ class CollectorManager:
self.logger_manager.log_info(f"Collector manager started - Managing {enabled_count} collectors")
return True
except Exception as e:
except (asyncio.CancelledError, KeyboardInterrupt):
# Handle graceful shutdown scenarios
self.status = ManagerStatus.ERROR
self.logger_manager.log_error(f"Failed to start collector manager: {e}", exc_info=True)
self.logger_manager.log_warning("Collector manager startup was cancelled")
return False
except (ConnectionError, OSError, IOError) as e:
# Handle connection and I/O related errors
self.status = ManagerStatus.ERROR
sanitized_message = self._sanitize_error(f"Connection/IO error starting collector manager: {e}")
self.logger_manager.log_error(sanitized_message, exc_info=True)
return False
except (AttributeError, TypeError, ValueError) as e:
# Handle configuration and data validation errors
self.status = ManagerStatus.ERROR
sanitized_message = self._sanitize_error(f"Configuration error starting collector manager: {e}")
self.logger_manager.log_error(sanitized_message, exc_info=True)
return False
except Exception as e:
# Catch any other unexpected errors
self.status = ManagerStatus.ERROR
sanitized_message = self._sanitize_error(f"Unexpected error starting collector manager: {e}")
self.logger_manager.log_error(sanitized_message, exc_info=True)
return False
async def stop(self) -> None:
@ -144,9 +176,20 @@ class CollectorManager:
self.status = ManagerStatus.STOPPED
self.logger_manager.log_info("Collector manager stopped")
except Exception as e:
except (asyncio.CancelledError, KeyboardInterrupt):
# Handle graceful shutdown scenarios
self.status = ManagerStatus.ERROR
self.logger_manager.log_error(f"Error stopping collector manager: {e}", exc_info=True)
self.logger_manager.log_warning("Collector manager shutdown was interrupted")
except (ConnectionError, OSError, IOError) as e:
# Handle connection and I/O related errors during shutdown
self.status = ManagerStatus.ERROR
sanitized_message = self._sanitize_error(f"Connection/IO error stopping collector manager: {e}")
self.logger_manager.log_error(sanitized_message, exc_info=True)
except Exception as e:
# Catch any other unexpected errors during shutdown
self.status = ManagerStatus.ERROR
sanitized_message = self._sanitize_error(f"Unexpected error stopping collector manager: {e}")
self.logger_manager.log_error(sanitized_message, exc_info=True)
async def restart_collector(self, collector_name: str) -> bool:
"""Restart a specific collector."""

View File

@ -105,12 +105,12 @@ Both files show good foundational architecture but exceed the recommended file s
- [x] 2.4 Refactor `run()` method to be under 50 lines by extracting sub-logics (e.g., `_run_main_loop`).
- [x] 2.5 Test './scripts/start_data_collection.py' and './scripts/production_clean.py' to ensure they work as expected.
- [ ] 3.0 Enhance Error Handling and Security
- [ ] 3.1 Implement a `_sanitize_error` method in `CollectorManager` and `DataCollectionService` to prevent leaking internal error details.
- [ ] 3.2 Add file permission validation for configuration files in `config/service_config.py`.
- [ ] 3.3 Review all `try-except` blocks to ensure specific exceptions are caught rather than broad `Exception`.
- [ ] 3.4 Ensure all logger calls include `exc_info=True` for error and critical logs.
- [ ] 3.5 Test './scripts/start_data_collection.py' and './scripts/production_clean.py' to ensure they work as expected.
- [x] 3.0 Enhance Error Handling and Security
- [x] 3.1 Implement a `_sanitize_error` method in `CollectorManager` and `DataCollectionService` to prevent leaking internal error details.
- [x] 3.2 Add file permission validation for configuration files in `config/service_config.py`.
- [x] 3.3 Review all `try-except` blocks to ensure specific exceptions are caught rather than broad `Exception`.
- [x] 3.4 Ensure all logger calls include `exc_info=True` for error and critical logs.
- [x] 3.5 Test './scripts/start_data_collection.py' and './scripts/production_clean.py' to ensure they work as expected.
- [ ] 4.0 Optimize Performance and Resource Management