TCPDashboard/data/common/ohlcv_data.py
Vasily.onl 33f2110f19 Refactor data module to enhance modularity and maintainability
- Extracted `OHLCVData` and validation logic into a new `common/ohlcv_data.py` module, promoting better organization and reusability.
- Updated `BaseDataCollector` to utilize the new `validate_ohlcv_data` function for improved data validation, enhancing code clarity and maintainability.
- Refactored imports in `data/__init__.py` to reflect the new structure, ensuring consistent access to common data types and exceptions.
- Removed redundant data validation logic from `BaseDataCollector`, streamlining its responsibilities.
- Added unit tests for `OHLCVData` and validation functions to ensure correctness and reliability.

These changes improve the architecture of the data module, aligning with project standards for maintainability and performance.
2025-06-10 12:04:58 +08:00

105 lines
3.7 KiB
Python

"""
OHLCV data structure and validation utilities.
This module provides standardized OHLCV (Open, High, Low, Close, Volume) data
structures and validation functions for financial market data.
"""
from dataclasses import dataclass
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, Any, Optional
class DataValidationError(Exception):
"""Exception raised when OHLCV data validation fails."""
pass
@dataclass
class OHLCVData:
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
symbol: str
timeframe: str
timestamp: datetime
open: Decimal
high: Decimal
low: Decimal
close: Decimal
volume: Decimal
trades_count: Optional[int] = None
def __post_init__(self):
"""Validate OHLCV data after initialization."""
if not self.timestamp.tzinfo:
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
# Validate price data
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
raise DataValidationError("All OHLCV prices must be numeric")
if not isinstance(self.volume, (Decimal, float, int)):
raise DataValidationError("Volume must be numeric")
# Convert to Decimal for precision
self.open = Decimal(str(self.open))
self.high = Decimal(str(self.high))
self.low = Decimal(str(self.low))
self.close = Decimal(str(self.close))
self.volume = Decimal(str(self.volume))
# Validate price relationships
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
def validate_ohlcv_data(data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
"""
Validate and convert raw OHLCV data to standardized format.
Args:
data: Raw OHLCV data dictionary
symbol: Trading symbol
timeframe: Timeframe (e.g., '1m', '5m', '1h')
Returns:
Validated OHLCVData object
Raises:
DataValidationError: If data validation fails
"""
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
# Check required fields
for field in required_fields:
if field not in data:
raise DataValidationError(f"Missing required field: {field}")
try:
# Parse timestamp
timestamp = data['timestamp']
if isinstance(timestamp, (int, float)):
# Assume Unix timestamp in milliseconds
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
elif isinstance(timestamp, str):
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
elif not isinstance(timestamp, datetime):
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
return OHLCVData(
symbol=symbol,
timeframe=timeframe,
timestamp=timestamp,
open=Decimal(str(data['open'])),
high=Decimal(str(data['high'])),
low=Decimal(str(data['low'])),
close=Decimal(str(data['close'])),
volume=Decimal(str(data['volume'])),
trades_count=data.get('trades_count')
)
except (ValueError, TypeError, KeyError) as e:
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
except Exception as e:
# Catch any other exceptions (like Decimal InvalidOperation)
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")