aggregate for specific condition
This commit is contained in:
parent
a924328c90
commit
736b278ee2
@ -1,5 +1,80 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
def check_data(data_df: pd.DataFrame) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if the input DataFrame has a DatetimeIndex.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_df (pd.DataFrame): DataFrame to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the DataFrame has a DatetimeIndex, False otherwise.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not isinstance(data_df.index, pd.DatetimeIndex):
|
||||||
|
print("Warning: Input DataFrame must have a DatetimeIndex.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
agg_rules = {}
|
||||||
|
|
||||||
|
# Define aggregation rules based on available columns
|
||||||
|
if 'open' in data_df.columns:
|
||||||
|
agg_rules['open'] = 'first'
|
||||||
|
if 'high' in data_df.columns:
|
||||||
|
agg_rules['high'] = 'max'
|
||||||
|
if 'low' in data_df.columns:
|
||||||
|
agg_rules['low'] = 'min'
|
||||||
|
if 'close' in data_df.columns:
|
||||||
|
agg_rules['close'] = 'last'
|
||||||
|
if 'volume' in data_df.columns:
|
||||||
|
agg_rules['volume'] = 'sum'
|
||||||
|
|
||||||
|
if not agg_rules:
|
||||||
|
print("Warning: No standard OHLCV columns (open, high, low, close, volume) found for daily aggregation.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return agg_rules
|
||||||
|
|
||||||
|
def aggregate_to_weekly(data_df: pd.DataFrame, weeks: int = 1) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Aggregates time-series financial data to weekly OHLCV format.
|
||||||
|
|
||||||
|
The input DataFrame is expected to have a DatetimeIndex.
|
||||||
|
'open' will be the first 'open' price of the week.
|
||||||
|
'close' will be the last 'close' price of the week.
|
||||||
|
'high' will be the maximum 'high' price of the week.
|
||||||
|
'low' will be the minimum 'low' price of the week.
|
||||||
|
'volume' (if present) will be the sum of volumes for the week.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_df (pd.DataFrame): DataFrame with a DatetimeIndex and columns
|
||||||
|
like 'open', 'high', 'low', 'close', and optionally 'volume'.
|
||||||
|
weeks (int): The number of weeks to aggregate to. Default is 1.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: DataFrame aggregated to weekly OHLCV data.
|
||||||
|
The index will be a DatetimeIndex with the time set to the start of the week.
|
||||||
|
Returns an empty DataFrame if no relevant OHLCV columns are found.
|
||||||
|
"""
|
||||||
|
|
||||||
|
agg_rules = check_data(data_df)
|
||||||
|
|
||||||
|
if not agg_rules:
|
||||||
|
print("Warning: No standard OHLCV columns (open, high, low, close, volume) found for weekly aggregation.")
|
||||||
|
return pd.DataFrame(index=pd.to_datetime([]))
|
||||||
|
|
||||||
|
# Resample to weekly frequency and apply aggregation rules
|
||||||
|
weekly_data = data_df.resample(f'{weeks}W').agg(agg_rules)
|
||||||
|
|
||||||
|
weekly_data.dropna(how='all', inplace=True)
|
||||||
|
|
||||||
|
# Adjust timestamps to the start of the week
|
||||||
|
if not weekly_data.empty and isinstance(weekly_data.index, pd.DatetimeIndex):
|
||||||
|
weekly_data.index = weekly_data.index.floor('W')
|
||||||
|
|
||||||
|
return weekly_data
|
||||||
|
|
||||||
|
|
||||||
def aggregate_to_daily(data_df: pd.DataFrame) -> pd.DataFrame:
|
def aggregate_to_daily(data_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Aggregates time-series financial data to daily OHLCV format.
|
Aggregates time-series financial data to daily OHLCV format.
|
||||||
@ -24,23 +99,9 @@ def aggregate_to_daily(data_df: pd.DataFrame) -> pd.DataFrame:
|
|||||||
Raises:
|
Raises:
|
||||||
ValueError: If the input DataFrame does not have a DatetimeIndex.
|
ValueError: If the input DataFrame does not have a DatetimeIndex.
|
||||||
"""
|
"""
|
||||||
if not isinstance(data_df.index, pd.DatetimeIndex):
|
|
||||||
raise ValueError("Input DataFrame must have a DatetimeIndex.")
|
|
||||||
|
|
||||||
agg_rules = {}
|
|
||||||
|
|
||||||
# Define aggregation rules based on available columns
|
|
||||||
if 'open' in data_df.columns:
|
|
||||||
agg_rules['open'] = 'first'
|
|
||||||
if 'high' in data_df.columns:
|
|
||||||
agg_rules['high'] = 'max'
|
|
||||||
if 'low' in data_df.columns:
|
|
||||||
agg_rules['low'] = 'min'
|
|
||||||
if 'close' in data_df.columns:
|
|
||||||
agg_rules['close'] = 'last'
|
|
||||||
if 'volume' in data_df.columns:
|
|
||||||
agg_rules['volume'] = 'sum'
|
|
||||||
|
|
||||||
|
agg_rules = check_data(data_df)
|
||||||
|
|
||||||
if not agg_rules:
|
if not agg_rules:
|
||||||
# Log a warning or raise an error if no relevant columns are found
|
# Log a warning or raise an error if no relevant columns are found
|
||||||
# For now, returning an empty DataFrame with a message might be suitable for some cases
|
# For now, returning an empty DataFrame with a message might be suitable for some cases
|
||||||
@ -58,3 +119,43 @@ def aggregate_to_daily(data_df: pd.DataFrame) -> pd.DataFrame:
|
|||||||
daily_data.dropna(how='all', inplace=True)
|
daily_data.dropna(how='all', inplace=True)
|
||||||
|
|
||||||
return daily_data
|
return daily_data
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_to_hourly(data_df: pd.DataFrame, hours: int = 1) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Aggregates time-series financial data to hourly OHLCV format.
|
||||||
|
|
||||||
|
The input DataFrame is expected to have a DatetimeIndex.
|
||||||
|
'open' will be the first 'open' price of the hour.
|
||||||
|
'close' will be the last 'close' price of the hour.
|
||||||
|
'high' will be the maximum 'high' price of the hour.
|
||||||
|
'low' will be the minimum 'low' price of the hour.
|
||||||
|
'volume' (if present) will be the sum of volumes for the hour.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_df (pd.DataFrame): DataFrame with a DatetimeIndex and columns
|
||||||
|
like 'open', 'high', 'low', 'close', and optionally 'volume'.
|
||||||
|
hours (int): The number of hours to aggregate to. Default is 1.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: DataFrame aggregated to hourly OHLCV data.
|
||||||
|
The index will be a DatetimeIndex with the time set to the start of the hour.
|
||||||
|
Returns an empty DataFrame if no relevant OHLCV columns are found.
|
||||||
|
"""
|
||||||
|
|
||||||
|
agg_rules = check_data(data_df)
|
||||||
|
|
||||||
|
if not agg_rules:
|
||||||
|
print("Warning: No standard OHLCV columns (open, high, low, close, volume) found for hourly aggregation.")
|
||||||
|
return pd.DataFrame(index=pd.to_datetime([]))
|
||||||
|
|
||||||
|
# Resample to hourly frequency and apply aggregation rules
|
||||||
|
hourly_data = data_df.resample(f'{hours}H').agg(agg_rules)
|
||||||
|
|
||||||
|
hourly_data.dropna(how='all', inplace=True)
|
||||||
|
|
||||||
|
# Adjust timestamps to the start of the hour
|
||||||
|
if not hourly_data.empty and isinstance(hourly_data.index, pd.DatetimeIndex):
|
||||||
|
hourly_data.index = hourly_data.index.floor('H')
|
||||||
|
|
||||||
|
return hourly_data
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user