Refactor data module to enhance modularity and maintainability
- Extracted `OHLCVData` and validation logic into a new `common/ohlcv_data.py` module, promoting better organization and reusability. - Updated `BaseDataCollector` to utilize the new `validate_ohlcv_data` function for improved data validation, enhancing code clarity and maintainability. - Refactored imports in `data/__init__.py` to reflect the new structure, ensuring consistent access to common data types and exceptions. - Removed redundant data validation logic from `BaseDataCollector`, streamlining its responsibilities. - Added unit tests for `OHLCVData` and validation functions to ensure correctness and reliability. These changes improve the architecture of the data module, aligning with project standards for maintainability and performance.
This commit is contained in:
@@ -6,9 +6,10 @@ processing and validating the data, and storing it in the database.
|
||||
"""
|
||||
|
||||
from .base_collector import (
|
||||
BaseDataCollector, DataCollectorError, DataValidationError,
|
||||
CollectorStatus, OHLCVData
|
||||
BaseDataCollector, DataCollectorError
|
||||
)
|
||||
from .collector.collector_state_telemetry import CollectorStatus
|
||||
from .common.ohlcv_data import OHLCVData, DataValidationError
|
||||
from .common.data_types import DataType, MarketDataPoint
|
||||
from .collector_manager import CollectorManager, ManagerStatus, CollectorConfig
|
||||
|
||||
|
||||
@@ -18,43 +18,7 @@ from .collector.collector_state_telemetry import CollectorStatus, CollectorState
|
||||
from .collector.collector_connection_manager import ConnectionManager
|
||||
from .collector.collector_callback_dispatcher import CallbackDispatcher
|
||||
from .common.data_types import DataType, MarketDataPoint
|
||||
|
||||
|
||||
@dataclass
|
||||
class OHLCVData:
|
||||
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
|
||||
symbol: str
|
||||
timeframe: str
|
||||
timestamp: datetime
|
||||
open: Decimal
|
||||
high: Decimal
|
||||
low: Decimal
|
||||
close: Decimal
|
||||
volume: Decimal
|
||||
trades_count: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate OHLCV data after initialization."""
|
||||
if not self.timestamp.tzinfo:
|
||||
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Validate price data
|
||||
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
|
||||
raise DataValidationError("All OHLCV prices must be numeric")
|
||||
|
||||
if not isinstance(self.volume, (Decimal, float, int)):
|
||||
raise DataValidationError("Volume must be numeric")
|
||||
|
||||
# Convert to Decimal for precision
|
||||
self.open = Decimal(str(self.open))
|
||||
self.high = Decimal(str(self.high))
|
||||
self.low = Decimal(str(self.low))
|
||||
self.close = Decimal(str(self.close))
|
||||
self.volume = Decimal(str(self.volume))
|
||||
|
||||
# Validate price relationships
|
||||
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
|
||||
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
|
||||
from .common.ohlcv_data import OHLCVData, DataValidationError, validate_ohlcv_data
|
||||
|
||||
|
||||
class DataCollectorError(Exception):
|
||||
@@ -62,11 +26,6 @@ class DataCollectorError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DataValidationError(DataCollectorError):
|
||||
"""Exception raised when data validation fails."""
|
||||
pass
|
||||
|
||||
|
||||
class ConnectionError(DataCollectorError):
|
||||
"""Exception raised when connection to data source fails."""
|
||||
pass
|
||||
@@ -493,7 +452,17 @@ class BaseDataCollector(ABC):
|
||||
Returns:
|
||||
Dictionary containing status information
|
||||
"""
|
||||
return self._state_telemetry.get_status()
|
||||
status = self._state_telemetry.get_status()
|
||||
|
||||
# Add BaseDataCollector specific information
|
||||
status.update({
|
||||
'symbols': list(self.symbols),
|
||||
'data_types': [dt.value for dt in self.data_types],
|
||||
'timeframes': self.timeframes,
|
||||
'auto_restart': self.auto_restart
|
||||
})
|
||||
|
||||
return status
|
||||
|
||||
def get_health_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -553,38 +522,7 @@ class BaseDataCollector(ABC):
|
||||
Raises:
|
||||
DataValidationError: If data validation fails
|
||||
"""
|
||||
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
# Check required fields
|
||||
for field in required_fields:
|
||||
if field not in data:
|
||||
raise DataValidationError(f"Missing required field: {field}")
|
||||
|
||||
try:
|
||||
# Parse timestamp
|
||||
timestamp = data['timestamp']
|
||||
if isinstance(timestamp, (int, float)):
|
||||
# Assume Unix timestamp in milliseconds
|
||||
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
|
||||
elif isinstance(timestamp, str):
|
||||
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
elif not isinstance(timestamp, datetime):
|
||||
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
|
||||
|
||||
return OHLCVData(
|
||||
symbol=symbol,
|
||||
timeframe=timeframe,
|
||||
timestamp=timestamp,
|
||||
open=Decimal(str(data['open'])),
|
||||
high=Decimal(str(data['high'])),
|
||||
low=Decimal(str(data['low'])),
|
||||
close=Decimal(str(data['close'])),
|
||||
volume=Decimal(str(data['volume'])),
|
||||
trades_count=data.get('trades_count')
|
||||
)
|
||||
|
||||
except (ValueError, TypeError, KeyError) as e:
|
||||
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
||||
return validate_ohlcv_data(data, symbol, timeframe)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""String representation of the collector."""
|
||||
|
||||
105
data/common/ohlcv_data.py
Normal file
105
data/common/ohlcv_data.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
OHLCV data structure and validation utilities.
|
||||
|
||||
This module provides standardized OHLCV (Open, High, Low, Close, Volume) data
|
||||
structures and validation functions for financial market data.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from decimal import Decimal
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
class DataValidationError(Exception):
|
||||
"""Exception raised when OHLCV data validation fails."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class OHLCVData:
|
||||
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
|
||||
symbol: str
|
||||
timeframe: str
|
||||
timestamp: datetime
|
||||
open: Decimal
|
||||
high: Decimal
|
||||
low: Decimal
|
||||
close: Decimal
|
||||
volume: Decimal
|
||||
trades_count: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate OHLCV data after initialization."""
|
||||
if not self.timestamp.tzinfo:
|
||||
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Validate price data
|
||||
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
|
||||
raise DataValidationError("All OHLCV prices must be numeric")
|
||||
|
||||
if not isinstance(self.volume, (Decimal, float, int)):
|
||||
raise DataValidationError("Volume must be numeric")
|
||||
|
||||
# Convert to Decimal for precision
|
||||
self.open = Decimal(str(self.open))
|
||||
self.high = Decimal(str(self.high))
|
||||
self.low = Decimal(str(self.low))
|
||||
self.close = Decimal(str(self.close))
|
||||
self.volume = Decimal(str(self.volume))
|
||||
|
||||
# Validate price relationships
|
||||
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
|
||||
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
|
||||
|
||||
|
||||
def validate_ohlcv_data(data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
|
||||
"""
|
||||
Validate and convert raw OHLCV data to standardized format.
|
||||
|
||||
Args:
|
||||
data: Raw OHLCV data dictionary
|
||||
symbol: Trading symbol
|
||||
timeframe: Timeframe (e.g., '1m', '5m', '1h')
|
||||
|
||||
Returns:
|
||||
Validated OHLCVData object
|
||||
|
||||
Raises:
|
||||
DataValidationError: If data validation fails
|
||||
"""
|
||||
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
# Check required fields
|
||||
for field in required_fields:
|
||||
if field not in data:
|
||||
raise DataValidationError(f"Missing required field: {field}")
|
||||
|
||||
try:
|
||||
# Parse timestamp
|
||||
timestamp = data['timestamp']
|
||||
if isinstance(timestamp, (int, float)):
|
||||
# Assume Unix timestamp in milliseconds
|
||||
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
|
||||
elif isinstance(timestamp, str):
|
||||
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
elif not isinstance(timestamp, datetime):
|
||||
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
|
||||
|
||||
return OHLCVData(
|
||||
symbol=symbol,
|
||||
timeframe=timeframe,
|
||||
timestamp=timestamp,
|
||||
open=Decimal(str(data['open'])),
|
||||
high=Decimal(str(data['high'])),
|
||||
low=Decimal(str(data['low'])),
|
||||
close=Decimal(str(data['close'])),
|
||||
volume=Decimal(str(data['volume'])),
|
||||
trades_count=data.get('trades_count')
|
||||
)
|
||||
|
||||
except (ValueError, TypeError, KeyError) as e:
|
||||
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
||||
except Exception as e:
|
||||
# Catch any other exceptions (like Decimal InvalidOperation)
|
||||
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
||||
Reference in New Issue
Block a user