Enhance DataLoader and MinuteDataBuffer for improved data handling

- Added error handling in DataLoader to attempt reading CSV files with a fallback to the Python engine if the default engine fails.
- Converted numpy float32 columns to Python float for compatibility in DataLoader.
- Updated MinuteDataBuffer to accept both Python and numpy numeric types, ensuring consistent data validation and conversion.
This commit is contained in:
Ajasra 2025-05-29 14:21:16 +08:00
parent 790bd9ccdd
commit b0ea701020
2 changed files with 25 additions and 2 deletions

View File

@ -98,7 +98,11 @@ class DataLoader:
}
# Read data with original capitalized column names
data = pd.read_csv(file_path, dtype=dtypes)
try:
data = pd.read_csv(file_path, dtype=dtypes)
except Exception as e:
logger.warning(f"Failed to read CSV with default engine, trying python engine: {e}")
data = pd.read_csv(file_path, dtype=dtypes, engine='python')
# Handle timestamp column
if 'Timestamp' in data.columns:
@ -107,6 +111,13 @@ class DataLoader:
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= end_date)]
# Convert column names to lowercase
data.columns = data.columns.str.lower()
# Convert numpy float32 to Python float for compatibility
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_columns:
if col in data.columns:
data[col] = data[col].astype(float)
logger.info(f"CSV data loaded: {len(data)} rows for {start_date} to {end_date}")
return data.set_index('timestamp')
else:
@ -115,6 +126,13 @@ class DataLoader:
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= end_date)]
data.columns = data.columns.str.lower()
# Convert numpy float32 to Python float for compatibility
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_columns:
if col in data.columns:
data[col] = data[col].astype(float)
logger.info(f"CSV data loaded (first column as timestamp): {len(data)} rows for {start_date} to {end_date}")
return data.set_index('timestamp')

View File

@ -319,8 +319,13 @@ class MinuteDataBuffer:
for field in required_fields:
if field not in ohlcv_data:
raise ValueError(f"Missing required field: {field}")
if not isinstance(ohlcv_data[field], (int, float)):
# Accept both Python numeric types and numpy numeric types
if not isinstance(ohlcv_data[field], (int, float, np.number)):
raise ValueError(f"Field {field} must be numeric, got {type(ohlcv_data[field])}")
# Convert numpy types to Python types to ensure compatibility
if isinstance(ohlcv_data[field], np.number):
ohlcv_data[field] = float(ohlcv_data[field])
# Check timestamp ordering (allow equal timestamps for updates)
if self._last_timestamp is not None and timestamp < self._last_timestamp: