Enhance DataLoader and MinuteDataBuffer for improved data handling
- Added error handling in DataLoader to attempt reading CSV files with a fallback to the Python engine if the default engine fails. - Converted numpy float32 columns to Python float for compatibility in DataLoader. - Updated MinuteDataBuffer to accept both Python and numpy numeric types, ensuring consistent data validation and conversion.
This commit is contained in:
parent
790bd9ccdd
commit
b0ea701020
@ -98,7 +98,11 @@ class DataLoader:
|
||||
}
|
||||
|
||||
# Read data with original capitalized column names
|
||||
data = pd.read_csv(file_path, dtype=dtypes)
|
||||
try:
|
||||
data = pd.read_csv(file_path, dtype=dtypes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read CSV with default engine, trying python engine: {e}")
|
||||
data = pd.read_csv(file_path, dtype=dtypes, engine='python')
|
||||
|
||||
# Handle timestamp column
|
||||
if 'Timestamp' in data.columns:
|
||||
@ -107,6 +111,13 @@ class DataLoader:
|
||||
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= end_date)]
|
||||
# Convert column names to lowercase
|
||||
data.columns = data.columns.str.lower()
|
||||
|
||||
# Convert numpy float32 to Python float for compatibility
|
||||
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
|
||||
for col in numeric_columns:
|
||||
if col in data.columns:
|
||||
data[col] = data[col].astype(float)
|
||||
|
||||
logger.info(f"CSV data loaded: {len(data)} rows for {start_date} to {end_date}")
|
||||
return data.set_index('timestamp')
|
||||
else:
|
||||
@ -115,6 +126,13 @@ class DataLoader:
|
||||
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
|
||||
data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= end_date)]
|
||||
data.columns = data.columns.str.lower()
|
||||
|
||||
# Convert numpy float32 to Python float for compatibility
|
||||
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
|
||||
for col in numeric_columns:
|
||||
if col in data.columns:
|
||||
data[col] = data[col].astype(float)
|
||||
|
||||
logger.info(f"CSV data loaded (first column as timestamp): {len(data)} rows for {start_date} to {end_date}")
|
||||
return data.set_index('timestamp')
|
||||
|
||||
|
||||
@ -319,8 +319,13 @@ class MinuteDataBuffer:
|
||||
for field in required_fields:
|
||||
if field not in ohlcv_data:
|
||||
raise ValueError(f"Missing required field: {field}")
|
||||
if not isinstance(ohlcv_data[field], (int, float)):
|
||||
# Accept both Python numeric types and numpy numeric types
|
||||
if not isinstance(ohlcv_data[field], (int, float, np.number)):
|
||||
raise ValueError(f"Field {field} must be numeric, got {type(ohlcv_data[field])}")
|
||||
|
||||
# Convert numpy types to Python types to ensure compatibility
|
||||
if isinstance(ohlcv_data[field], np.number):
|
||||
ohlcv_data[field] = float(ohlcv_data[field])
|
||||
|
||||
# Check timestamp ordering (allow equal timestamps for updates)
|
||||
if self._last_timestamp is not None and timestamp < self._last_timestamp:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user