Boilinger Band and RSI implementation

This commit is contained in:
Ajasra
2025-05-20 18:28:53 +08:00
parent 837c505828
commit 08c871e05a
6 changed files with 415 additions and 9 deletions

View File

@@ -0,0 +1,60 @@
import pandas as pd
def aggregate_to_daily(data_df: pd.DataFrame) -> pd.DataFrame:
"""
Aggregates time-series financial data to daily OHLCV format.
The input DataFrame is expected to have a DatetimeIndex.
'open' will be the first 'open' price of the day.
'close' will be the last 'close' price of the day.
'high' will be the maximum 'high' price of the day.
'low' will be the minimum 'low' price of the day.
'volume' (if present) will be the sum of volumes for the day.
Args:
data_df (pd.DataFrame): DataFrame with a DatetimeIndex and columns
like 'open', 'high', 'low', 'close', and optionally 'volume'.
Column names are expected to be lowercase.
Returns:
pd.DataFrame: DataFrame aggregated to daily OHLCV data.
The index will be a DatetimeIndex with the time set to noon (12:00:00) for each day.
Returns an empty DataFrame if no relevant OHLCV columns are found.
Raises:
ValueError: If the input DataFrame does not have a DatetimeIndex.
"""
if not isinstance(data_df.index, pd.DatetimeIndex):
raise ValueError("Input DataFrame must have a DatetimeIndex.")
agg_rules = {}
# Define aggregation rules based on available columns
if 'open' in data_df.columns:
agg_rules['open'] = 'first'
if 'high' in data_df.columns:
agg_rules['high'] = 'max'
if 'low' in data_df.columns:
agg_rules['low'] = 'min'
if 'close' in data_df.columns:
agg_rules['close'] = 'last'
if 'volume' in data_df.columns:
agg_rules['volume'] = 'sum'
if not agg_rules:
# Log a warning or raise an error if no relevant columns are found
# For now, returning an empty DataFrame with a message might be suitable for some cases
print("Warning: No standard OHLCV columns (open, high, low, close, volume) found for daily aggregation.")
return pd.DataFrame(index=pd.to_datetime([])) # Return empty DF with datetime index
# Resample to daily frequency and apply aggregation rules
daily_data = data_df.resample('D').agg(agg_rules)
# Adjust timestamps to noon if data exists
if not daily_data.empty and isinstance(daily_data.index, pd.DatetimeIndex):
daily_data.index = daily_data.index + pd.Timedelta(hours=12)
# Remove rows where all values are NaN (these are days with no trades in the original data)
daily_data.dropna(how='all', inplace=True)
return daily_data

View File

@@ -57,20 +57,75 @@ class Storage:
}
# Read data with original capitalized column names
data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
# Convert timestamp to datetime
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
# Filter by date range
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
# Now convert column names to lowercase
data.columns = data.columns.str.lower()
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
return data.set_index('timestamp')
if 'Timestamp' in data.columns:
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
# Filter by date range
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
# Now convert column names to lowercase
data.columns = data.columns.str.lower()
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
return data.set_index('timestamp')
else: # Attempt to use the first column if 'Timestamp' is not present
data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= stop_date)]
data.columns = data.columns.str.lower() # Ensure all other columns are lower
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
return data.set_index('timestamp')
except Exception as e:
if self.logging is not None:
self.logging.error(f"Error loading data from {file_path}: {e}")
return None
# Return an empty DataFrame with a DatetimeIndex
return pd.DataFrame(index=pd.to_datetime([]))
def save_data(self, data: pd.DataFrame, file_path: str):
"""Save processed data to a CSV file.
If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
(seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
Args:
data (pd.DataFrame): data to save.
file_path (str): path to the data file relative to the data_dir.
"""
data_to_save = data.copy()
if isinstance(data_to_save.index, pd.DatetimeIndex):
# Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
# and make it a column named 'timestamp'.
data_to_save['timestamp'] = data_to_save.index.astype('int64') / 1e9
# Reset index so 'timestamp' column is saved and old DatetimeIndex is not saved as a column.
# We want the 'timestamp' column to be the first one.
data_to_save.reset_index(drop=True, inplace=True)
# Ensure 'timestamp' is the first column if other columns exist
if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
data_to_save = data_to_save[cols]
elif pd.api.types.is_numeric_dtype(data_to_save.index.dtype):
# If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle),
# make it a column named 'timestamp'.
data_to_save['timestamp'] = data_to_save.index
data_to_save.reset_index(drop=True, inplace=True)
if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
data_to_save = data_to_save[cols]
else:
# For other index types, or if no index that we want to specifically handle,
# save with the current index. pandas to_csv will handle it.
# This branch might be removed if we strictly expect either DatetimeIndex or a numeric one from previous save.
pass # data_to_save remains as is, to_csv will write its index if index=True
# Save to CSV, ensuring the 'timestamp' column (if created) is written, and not the DataFrame's active index.
full_path = os.path.join(self.data_dir, file_path)
data_to_save.to_csv(full_path, index=False) # index=False because timestamp is now a column
if self.logging is not None:
self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
def format_row(self, row):
"""Format a row for a combined results CSV file
Args: