From 934c807246526c380adeaf66a8ff069096f91503 Mon Sep 17 00:00:00 2001 From: Ajasra Date: Thu, 22 May 2025 17:24:16 +0800 Subject: [PATCH] fixed depricated parameters --- cycles/utils/data_utils.py | 42 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/cycles/utils/data_utils.py b/cycles/utils/data_utils.py index e691a07..68e53c4 100644 --- a/cycles/utils/data_utils.py +++ b/cycles/utils/data_utils.py @@ -150,12 +150,50 @@ def aggregate_to_hourly(data_df: pd.DataFrame, hours: int = 1) -> pd.DataFrame: return pd.DataFrame(index=pd.to_datetime([])) # Resample to hourly frequency and apply aggregation rules - hourly_data = data_df.resample(f'{hours}H').agg(agg_rules) + hourly_data = data_df.resample(f'{hours}h').agg(agg_rules) hourly_data.dropna(how='all', inplace=True) # Adjust timestamps to the start of the hour if not hourly_data.empty and isinstance(hourly_data.index, pd.DatetimeIndex): - hourly_data.index = hourly_data.index.floor('H') + hourly_data.index = hourly_data.index.floor('h') return hourly_data + + +def aggregate_to_minutes(data_df: pd.DataFrame, minutes: int) -> pd.DataFrame: + """ + Aggregates time-series financial data to N-minute OHLCV format. + + The input DataFrame is expected to have a DatetimeIndex. + 'open' will be the first 'open' price of the N-minute interval. + 'close' will be the last 'close' price of the N-minute interval. + 'high' will be the maximum 'high' price of the N-minute interval. + 'low' will be the minimum 'low' price of the N-minute interval. + 'volume' (if present) will be the sum of volumes for the N-minute interval. + + Args: + data_df (pd.DataFrame): DataFrame with a DatetimeIndex and columns + like 'open', 'high', 'low', 'close', and optionally 'volume'. + minutes (int): The number of minutes to aggregate to. + + Returns: + pd.DataFrame: DataFrame aggregated to N-minute OHLCV data. + The index will be a DatetimeIndex. + Returns an empty DataFrame if no relevant OHLCV columns are found or + if the input DataFrame does not have a DatetimeIndex. + """ + agg_rules_obj = check_data(data_df) # check_data returns rules or False + + if not agg_rules_obj: + # check_data already prints a warning if index is not DatetimeIndex or no OHLCV columns + # Ensure an empty DataFrame with a DatetimeIndex is returned for consistency + return pd.DataFrame(index=pd.to_datetime([])) + + # Resample to N-minute frequency and apply aggregation rules + # Using .agg(agg_rules_obj) where agg_rules_obj is the dict from check_data + resampled_data = data_df.resample(f'{minutes}min').agg(agg_rules_obj) + + resampled_data.dropna(how='all', inplace=True) + + return resampled_data