fixed depricated parameters

2025-05-22 17:24:16 +08:00
parent 8e220b564c
commit 934c807246
1 changed files with 40 additions and 2 deletions
--- a/cycles/utils/data_utils.py
+++ b/cycles/utils/data_utils.py
@@ -150,12 +150,50 @@ def aggregate_to_hourly(data_df: pd.DataFrame, hours: int = 1) -> pd.DataFrame:
        return pd.DataFrame(index=pd.to_datetime([]))

    # Resample to hourly frequency and apply aggregation rules  
-    hourly_data = data_df.resample(f'{hours}H').agg(agg_rules)
+    hourly_data = data_df.resample(f'{hours}h').agg(agg_rules)

    hourly_data.dropna(how='all', inplace=True)

    # Adjust timestamps to the start of the hour
    if not hourly_data.empty and isinstance(hourly_data.index, pd.DatetimeIndex):
-        hourly_data.index = hourly_data.index.floor('H')
+        hourly_data.index = hourly_data.index.floor('h')

    return hourly_data
+
+
+def aggregate_to_minutes(data_df: pd.DataFrame, minutes: int) -> pd.DataFrame:
+    """
+    Aggregates time-series financial data to N-minute OHLCV format.
+
+    The input DataFrame is expected to have a DatetimeIndex.
+    'open' will be the first 'open' price of the N-minute interval.
+    'close' will be the last 'close' price of the N-minute interval.
+    'high' will be the maximum 'high' price of the N-minute interval.
+    'low' will be the minimum 'low' price of the N-minute interval.
+    'volume' (if present) will be the sum of volumes for the N-minute interval.
+
+    Args:
+        data_df (pd.DataFrame): DataFrame with a DatetimeIndex and columns
+                                like 'open', 'high', 'low', 'close', and optionally 'volume'.
+        minutes (int): The number of minutes to aggregate to.
+
+    Returns:
+        pd.DataFrame: DataFrame aggregated to N-minute OHLCV data.
+                      The index will be a DatetimeIndex.
+                      Returns an empty DataFrame if no relevant OHLCV columns are found or
+                      if the input DataFrame does not have a DatetimeIndex.
+    """
+    agg_rules_obj = check_data(data_df) # check_data returns rules or False
+
+    if not agg_rules_obj:
+        # check_data already prints a warning if index is not DatetimeIndex or no OHLCV columns
+        # Ensure an empty DataFrame with a DatetimeIndex is returned for consistency
+        return pd.DataFrame(index=pd.to_datetime([]))
+
+    # Resample to N-minute frequency and apply aggregation rules
+    # Using .agg(agg_rules_obj) where agg_rules_obj is the dict from check_data
+    resampled_data = data_df.resample(f'{minutes}min').agg(agg_rules_obj)
+    
+    resampled_data.dropna(how='all', inplace=True)
+
+    return resampled_data