data ingestion
This commit is contained in:
@@ -8,6 +8,7 @@ and trade data aggregation.
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
from utils.timeframe_utils import load_timeframe_options
|
||||
import pandas as pd
|
||||
|
||||
from ..data_types import StandardizedTrade, OHLCVCandle
|
||||
|
||||
@@ -74,8 +75,75 @@ def parse_timeframe(timeframe: str) -> Tuple[int, str]:
|
||||
return number, unit
|
||||
|
||||
|
||||
def resample_candles_to_timeframe(df: pd.DataFrame, target_timeframe: str) -> pd.DataFrame:
|
||||
"""
|
||||
Resamples a DataFrame of OHLCV candles to a higher timeframe.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): Input DataFrame with a datetime index and 'open', 'high', 'low', 'close', 'volume',
|
||||
and optionally 'trades_count' columns.
|
||||
target_timeframe (str): The target timeframe for resampling (e.g., '1h', '1d').
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: Resampled DataFrame with OHLCV data for the target timeframe.
|
||||
"""
|
||||
if df.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Ensure the DataFrame index is a datetime index
|
||||
if not isinstance(df.index, pd.DatetimeIndex):
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
||||
df = df.set_index('timestamp')
|
||||
|
||||
# Convert timedelta string to pandas frequency string
|
||||
# '1m' -> '1T', '1h' -> '1H', '1d' -> '1D'
|
||||
timeframe_map = {
|
||||
's': 'S',
|
||||
'm': 'T',
|
||||
'h': 'H',
|
||||
'd': 'D'
|
||||
}
|
||||
|
||||
# Convert target_timeframe to pandas offset string
|
||||
match = re.match(r'^(\d+)([smhd])$', target_timeframe.lower())
|
||||
if not match:
|
||||
raise ValueError(f"Invalid target timeframe format: {target_timeframe}")
|
||||
number = match.group(1)
|
||||
unit = timeframe_map.get(match.group(2))
|
||||
if not unit:
|
||||
raise ValueError(f"Unsupported timeframe unit: {target_timeframe}")
|
||||
|
||||
resample_freq = f"{number}{unit}"
|
||||
|
||||
# Define how to aggregate each column
|
||||
ohlcv_dict = {
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
'volume': 'sum',
|
||||
}
|
||||
|
||||
# Only include 'trades_count' if it exists in the DataFrame
|
||||
if 'trades_count' in df.columns:
|
||||
ohlcv_dict['trades_count'] = 'sum'
|
||||
|
||||
# Resample the data
|
||||
resampled_df = df.resample(resample_freq).apply(ohlcv_dict)
|
||||
|
||||
# Drop rows where all OHLCV values are NaN (e.g., periods with no data)
|
||||
resampled_df.dropna(subset=['open', 'high', 'low', 'close'], inplace=True)
|
||||
|
||||
# Fill NaN trades_count with 0 after resampling
|
||||
if 'trades_count' in resampled_df.columns:
|
||||
resampled_df['trades_count'] = resampled_df['trades_count'].fillna(0).astype(int)
|
||||
|
||||
return resampled_df
|
||||
|
||||
|
||||
__all__ = [
|
||||
'aggregate_trades_to_candles',
|
||||
'validate_timeframe',
|
||||
'parse_timeframe'
|
||||
'parse_timeframe',
|
||||
'resample_candles_to_timeframe'
|
||||
]
|
||||
Reference in New Issue
Block a user