Enhance DataLoader and MinuteDataBuffer for improved data handling

- Added error handling in DataLoader to attempt reading CSV files with a fallback to the Python engine if the default engine fails. - Converted numpy float32 columns to Python float for compatibility in DataLoader. - Updated MinuteDataBuffer to accept both Python and numpy numeric types, ensuring consistent data validation and conversion.
2025-05-29 14:21:16 +08:00
parent 790bd9ccdd
commit b0ea701020
2 changed files with 25 additions and 2 deletions
--- a/IncrementalTrader/backtester/utils.py
+++ b/IncrementalTrader/backtester/utils.py
@@ -98,7 +98,11 @@ class DataLoader:
        }
        
        # Read data with original capitalized column names
-        data = pd.read_csv(file_path, dtype=dtypes)
+        try:
+            data = pd.read_csv(file_path, dtype=dtypes)
+        except Exception as e:
+            logger.warning(f"Failed to read CSV with default engine, trying python engine: {e}")
+            data = pd.read_csv(file_path, dtype=dtypes, engine='python')
        
        # Handle timestamp column
        if 'Timestamp' in data.columns:
@@ -107,6 +111,13 @@ class DataLoader:
            data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= end_date)]
            # Convert column names to lowercase
            data.columns = data.columns.str.lower()
+            
+            # Convert numpy float32 to Python float for compatibility
+            numeric_columns = ['open', 'high', 'low', 'close', 'volume']
+            for col in numeric_columns:
+                if col in data.columns:
+                    data[col] = data[col].astype(float)
+            
            logger.info(f"CSV data loaded: {len(data)} rows for {start_date} to {end_date}")
            return data.set_index('timestamp')
        else:
@@ -115,6 +126,13 @@ class DataLoader:
            data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
            data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= end_date)]
            data.columns = data.columns.str.lower()
+            
+            # Convert numpy float32 to Python float for compatibility
+            numeric_columns = ['open', 'high', 'low', 'close', 'volume']
+            for col in numeric_columns:
+                if col in data.columns:
+                    data[col] = data[col].astype(float)
+            
            logger.info(f"CSV data loaded (first column as timestamp): {len(data)} rows for {start_date} to {end_date}")
            return data.set_index('timestamp')
    
--- a/IncrementalTrader/utils/timeframe_utils.py
+++ b/IncrementalTrader/utils/timeframe_utils.py
@@ -319,8 +319,13 @@ class MinuteDataBuffer:
        for field in required_fields:
            if field not in ohlcv_data:
                raise ValueError(f"Missing required field: {field}")
-            if not isinstance(ohlcv_data[field], (int, float)):
+            # Accept both Python numeric types and numpy numeric types
+            if not isinstance(ohlcv_data[field], (int, float, np.number)):
                raise ValueError(f"Field {field} must be numeric, got {type(ohlcv_data[field])}")
+            
+            # Convert numpy types to Python types to ensure compatibility
+            if isinstance(ohlcv_data[field], np.number):
+                ohlcv_data[field] = float(ohlcv_data[field])
        
        # Check timestamp ordering (allow equal timestamps for updates)
        if self._last_timestamp is not None and timestamp < self._last_timestamp: