From b0ea701020dc4dce11d7566068008a98fbd8c0d0 Mon Sep 17 00:00:00 2001
From: Ajasra <betinvasilij@gmail.com>
Date: Thu, 29 May 2025 14:21:16 +0800
Subject: [PATCH] Enhance DataLoader and MinuteDataBuffer for improved data
 handling

- Added error handling in DataLoader to attempt reading CSV files with a fallback to the Python engine if the default engine fails.
- Converted numpy float32 columns to Python float for compatibility in DataLoader.
- Updated MinuteDataBuffer to accept both Python and numpy numeric types, ensuring consistent data validation and conversion.
---
 IncrementalTrader/backtester/utils.py      | 20 +++++++++++++++++++-
 IncrementalTrader/utils/timeframe_utils.py |  7 ++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/IncrementalTrader/backtester/utils.py b/IncrementalTrader/backtester/utils.py
index a7cd59a..9417673 100644
--- a/IncrementalTrader/backtester/utils.py
+++ b/IncrementalTrader/backtester/utils.py
@@ -98,7 +98,11 @@ class DataLoader:
         }
         
         # Read data with original capitalized column names
-        data = pd.read_csv(file_path, dtype=dtypes)
+        try:
+            data = pd.read_csv(file_path, dtype=dtypes)
+        except Exception as e:
+            logger.warning(f"Failed to read CSV with default engine, trying python engine: {e}")
+            data = pd.read_csv(file_path, dtype=dtypes, engine='python')
         
         # Handle timestamp column
         if 'Timestamp' in data.columns:
@@ -107,6 +111,13 @@ class DataLoader:
             data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= end_date)]
             # Convert column names to lowercase
             data.columns = data.columns.str.lower()
+            
+            # Convert numpy float32 to Python float for compatibility
+            numeric_columns = ['open', 'high', 'low', 'close', 'volume']
+            for col in numeric_columns:
+                if col in data.columns:
+                    data[col] = data[col].astype(float)
+            
             logger.info(f"CSV data loaded: {len(data)} rows for {start_date} to {end_date}")
             return data.set_index('timestamp')
         else:
@@ -115,6 +126,13 @@ class DataLoader:
             data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
             data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= end_date)]
             data.columns = data.columns.str.lower()
+            
+            # Convert numpy float32 to Python float for compatibility
+            numeric_columns = ['open', 'high', 'low', 'close', 'volume']
+            for col in numeric_columns:
+                if col in data.columns:
+                    data[col] = data[col].astype(float)
+            
             logger.info(f"CSV data loaded (first column as timestamp): {len(data)} rows for {start_date} to {end_date}")
             return data.set_index('timestamp')
     
diff --git a/IncrementalTrader/utils/timeframe_utils.py b/IncrementalTrader/utils/timeframe_utils.py
index 15672a9..b70aa8e 100644
--- a/IncrementalTrader/utils/timeframe_utils.py
+++ b/IncrementalTrader/utils/timeframe_utils.py
@@ -319,8 +319,13 @@ class MinuteDataBuffer:
         for field in required_fields:
             if field not in ohlcv_data:
                 raise ValueError(f"Missing required field: {field}")
-            if not isinstance(ohlcv_data[field], (int, float)):
+            # Accept both Python numeric types and numpy numeric types
+            if not isinstance(ohlcv_data[field], (int, float, np.number)):
                 raise ValueError(f"Field {field} must be numeric, got {type(ohlcv_data[field])}")
+            
+            # Convert numpy types to Python types to ensure compatibility
+            if isinstance(ohlcv_data[field], np.number):
+                ohlcv_data[field] = float(ohlcv_data[field])
         
         # Check timestamp ordering (allow equal timestamps for updates)
         if self._last_timestamp is not None and timestamp < self._last_timestamp: