Refactor data module to enhance modularity and maintainability

- Extracted `OHLCVData` and validation logic into a new `common/ohlcv_data.py` module, promoting better organization and reusability.
- Updated `BaseDataCollector` to utilize the new `validate_ohlcv_data` function for improved data validation, enhancing code clarity and maintainability.
- Refactored imports in `data/__init__.py` to reflect the new structure, ensuring consistent access to common data types and exceptions.
- Removed redundant data validation logic from `BaseDataCollector`, streamlining its responsibilities.
- Added unit tests for `OHLCVData` and validation functions to ensure correctness and reliability.

These changes improve the architecture of the data module, aligning with project standards for maintainability and performance.
This commit is contained in:
Vasily.onl
2025-06-10 12:04:58 +08:00
parent 3db8fb1c41
commit 33f2110f19
15 changed files with 511 additions and 1009 deletions

View File

@@ -6,9 +6,10 @@ processing and validating the data, and storing it in the database.
"""
from .base_collector import (
BaseDataCollector, DataCollectorError, DataValidationError,
CollectorStatus, OHLCVData
BaseDataCollector, DataCollectorError
)
from .collector.collector_state_telemetry import CollectorStatus
from .common.ohlcv_data import OHLCVData, DataValidationError
from .common.data_types import DataType, MarketDataPoint
from .collector_manager import CollectorManager, ManagerStatus, CollectorConfig

View File

@@ -18,43 +18,7 @@ from .collector.collector_state_telemetry import CollectorStatus, CollectorState
from .collector.collector_connection_manager import ConnectionManager
from .collector.collector_callback_dispatcher import CallbackDispatcher
from .common.data_types import DataType, MarketDataPoint
@dataclass
class OHLCVData:
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
symbol: str
timeframe: str
timestamp: datetime
open: Decimal
high: Decimal
low: Decimal
close: Decimal
volume: Decimal
trades_count: Optional[int] = None
def __post_init__(self):
"""Validate OHLCV data after initialization."""
if not self.timestamp.tzinfo:
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
# Validate price data
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
raise DataValidationError("All OHLCV prices must be numeric")
if not isinstance(self.volume, (Decimal, float, int)):
raise DataValidationError("Volume must be numeric")
# Convert to Decimal for precision
self.open = Decimal(str(self.open))
self.high = Decimal(str(self.high))
self.low = Decimal(str(self.low))
self.close = Decimal(str(self.close))
self.volume = Decimal(str(self.volume))
# Validate price relationships
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
from .common.ohlcv_data import OHLCVData, DataValidationError, validate_ohlcv_data
class DataCollectorError(Exception):
@@ -62,11 +26,6 @@ class DataCollectorError(Exception):
pass
class DataValidationError(DataCollectorError):
"""Exception raised when data validation fails."""
pass
class ConnectionError(DataCollectorError):
"""Exception raised when connection to data source fails."""
pass
@@ -493,7 +452,17 @@ class BaseDataCollector(ABC):
Returns:
Dictionary containing status information
"""
return self._state_telemetry.get_status()
status = self._state_telemetry.get_status()
# Add BaseDataCollector specific information
status.update({
'symbols': list(self.symbols),
'data_types': [dt.value for dt in self.data_types],
'timeframes': self.timeframes,
'auto_restart': self.auto_restart
})
return status
def get_health_status(self) -> Dict[str, Any]:
"""
@@ -553,38 +522,7 @@ class BaseDataCollector(ABC):
Raises:
DataValidationError: If data validation fails
"""
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
# Check required fields
for field in required_fields:
if field not in data:
raise DataValidationError(f"Missing required field: {field}")
try:
# Parse timestamp
timestamp = data['timestamp']
if isinstance(timestamp, (int, float)):
# Assume Unix timestamp in milliseconds
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
elif isinstance(timestamp, str):
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
elif not isinstance(timestamp, datetime):
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
return OHLCVData(
symbol=symbol,
timeframe=timeframe,
timestamp=timestamp,
open=Decimal(str(data['open'])),
high=Decimal(str(data['high'])),
low=Decimal(str(data['low'])),
close=Decimal(str(data['close'])),
volume=Decimal(str(data['volume'])),
trades_count=data.get('trades_count')
)
except (ValueError, TypeError, KeyError) as e:
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
return validate_ohlcv_data(data, symbol, timeframe)
def __repr__(self) -> str:
"""String representation of the collector."""

105
data/common/ohlcv_data.py Normal file
View File

@@ -0,0 +1,105 @@
"""
OHLCV data structure and validation utilities.
This module provides standardized OHLCV (Open, High, Low, Close, Volume) data
structures and validation functions for financial market data.
"""
from dataclasses import dataclass
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, Any, Optional
class DataValidationError(Exception):
"""Exception raised when OHLCV data validation fails."""
pass
@dataclass
class OHLCVData:
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
symbol: str
timeframe: str
timestamp: datetime
open: Decimal
high: Decimal
low: Decimal
close: Decimal
volume: Decimal
trades_count: Optional[int] = None
def __post_init__(self):
"""Validate OHLCV data after initialization."""
if not self.timestamp.tzinfo:
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
# Validate price data
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
raise DataValidationError("All OHLCV prices must be numeric")
if not isinstance(self.volume, (Decimal, float, int)):
raise DataValidationError("Volume must be numeric")
# Convert to Decimal for precision
self.open = Decimal(str(self.open))
self.high = Decimal(str(self.high))
self.low = Decimal(str(self.low))
self.close = Decimal(str(self.close))
self.volume = Decimal(str(self.volume))
# Validate price relationships
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
def validate_ohlcv_data(data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
"""
Validate and convert raw OHLCV data to standardized format.
Args:
data: Raw OHLCV data dictionary
symbol: Trading symbol
timeframe: Timeframe (e.g., '1m', '5m', '1h')
Returns:
Validated OHLCVData object
Raises:
DataValidationError: If data validation fails
"""
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
# Check required fields
for field in required_fields:
if field not in data:
raise DataValidationError(f"Missing required field: {field}")
try:
# Parse timestamp
timestamp = data['timestamp']
if isinstance(timestamp, (int, float)):
# Assume Unix timestamp in milliseconds
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
elif isinstance(timestamp, str):
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
elif not isinstance(timestamp, datetime):
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
return OHLCVData(
symbol=symbol,
timeframe=timeframe,
timestamp=timestamp,
open=Decimal(str(data['open'])),
high=Decimal(str(data['high'])),
low=Decimal(str(data['low'])),
close=Decimal(str(data['close'])),
volume=Decimal(str(data['volume'])),
trades_count=data.get('trades_count')
)
except (ValueError, TypeError, KeyError) as e:
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
except Exception as e:
# Catch any other exceptions (like Decimal InvalidOperation)
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")