From b0ea701020dc4dce11d7566068008a98fbd8c0d0 Mon Sep 17 00:00:00 2001 From: Ajasra Date: Thu, 29 May 2025 14:21:16 +0800 Subject: [PATCH] Enhance DataLoader and MinuteDataBuffer for improved data handling - Added error handling in DataLoader to attempt reading CSV files with a fallback to the Python engine if the default engine fails. - Converted numpy float32 columns to Python float for compatibility in DataLoader. - Updated MinuteDataBuffer to accept both Python and numpy numeric types, ensuring consistent data validation and conversion. --- IncrementalTrader/backtester/utils.py | 20 +++++++++++++++++++- IncrementalTrader/utils/timeframe_utils.py | 7 ++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/IncrementalTrader/backtester/utils.py b/IncrementalTrader/backtester/utils.py index a7cd59a..9417673 100644 --- a/IncrementalTrader/backtester/utils.py +++ b/IncrementalTrader/backtester/utils.py @@ -98,7 +98,11 @@ class DataLoader: } # Read data with original capitalized column names - data = pd.read_csv(file_path, dtype=dtypes) + try: + data = pd.read_csv(file_path, dtype=dtypes) + except Exception as e: + logger.warning(f"Failed to read CSV with default engine, trying python engine: {e}") + data = pd.read_csv(file_path, dtype=dtypes, engine='python') # Handle timestamp column if 'Timestamp' in data.columns: @@ -107,6 +111,13 @@ class DataLoader: data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= end_date)] # Convert column names to lowercase data.columns = data.columns.str.lower() + + # Convert numpy float32 to Python float for compatibility + numeric_columns = ['open', 'high', 'low', 'close', 'volume'] + for col in numeric_columns: + if col in data.columns: + data[col] = data[col].astype(float) + logger.info(f"CSV data loaded: {len(data)} rows for {start_date} to {end_date}") return data.set_index('timestamp') else: @@ -115,6 +126,13 @@ class DataLoader: data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s') data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= end_date)] data.columns = data.columns.str.lower() + + # Convert numpy float32 to Python float for compatibility + numeric_columns = ['open', 'high', 'low', 'close', 'volume'] + for col in numeric_columns: + if col in data.columns: + data[col] = data[col].astype(float) + logger.info(f"CSV data loaded (first column as timestamp): {len(data)} rows for {start_date} to {end_date}") return data.set_index('timestamp') diff --git a/IncrementalTrader/utils/timeframe_utils.py b/IncrementalTrader/utils/timeframe_utils.py index 15672a9..b70aa8e 100644 --- a/IncrementalTrader/utils/timeframe_utils.py +++ b/IncrementalTrader/utils/timeframe_utils.py @@ -319,8 +319,13 @@ class MinuteDataBuffer: for field in required_fields: if field not in ohlcv_data: raise ValueError(f"Missing required field: {field}") - if not isinstance(ohlcv_data[field], (int, float)): + # Accept both Python numeric types and numpy numeric types + if not isinstance(ohlcv_data[field], (int, float, np.number)): raise ValueError(f"Field {field} must be numeric, got {type(ohlcv_data[field])}") + + # Convert numpy types to Python types to ensure compatibility + if isinstance(ohlcv_data[field], np.number): + ohlcv_data[field] = float(ohlcv_data[field]) # Check timestamp ordering (allow equal timestamps for updates) if self._last_timestamp is not None and timestamp < self._last_timestamp: