149 lines
4.5 KiB
Python
149 lines
4.5 KiB
Python
"""
|
|
Utility functions for market data aggregation.
|
|
|
|
This module provides common utility functions for working with OHLCV candles
|
|
and trade data aggregation.
|
|
"""
|
|
|
|
import re
|
|
from typing import List, Tuple
|
|
from utils.timeframe_utils import load_timeframe_options
|
|
import pandas as pd
|
|
|
|
from ..data_types import StandardizedTrade, OHLCVCandle
|
|
|
|
|
|
def aggregate_trades_to_candles(trades: List[StandardizedTrade],
|
|
timeframes: List[str],
|
|
symbol: str,
|
|
exchange: str) -> List[OHLCVCandle]:
|
|
"""
|
|
Simple utility function to aggregate a list of trades to candles.
|
|
|
|
Args:
|
|
trades: List of standardized trades
|
|
timeframes: List of timeframes to generate
|
|
symbol: Trading symbol
|
|
exchange: Exchange name
|
|
|
|
Returns:
|
|
List of completed candles
|
|
"""
|
|
from .batch import BatchCandleProcessor
|
|
processor = BatchCandleProcessor(symbol, exchange, timeframes)
|
|
return processor.process_trades_to_candles(iter(trades))
|
|
|
|
|
|
def validate_timeframe(timeframe: str) -> bool:
|
|
"""
|
|
Validate if timeframe is supported.
|
|
|
|
Args:
|
|
timeframe: Timeframe string (e.g., '1s', '5s', '10s', '1m', '5m', '1h')
|
|
|
|
Returns:
|
|
True if supported, False otherwise
|
|
"""
|
|
supported = [item['value'] for item in load_timeframe_options()]
|
|
return timeframe in supported
|
|
|
|
|
|
def parse_timeframe(timeframe: str) -> Tuple[int, str]:
|
|
"""
|
|
Parse timeframe string into number and unit.
|
|
|
|
Args:
|
|
timeframe: Timeframe string (e.g., '1s', '5m', '1h')
|
|
|
|
Returns:
|
|
Tuple of (number, unit)
|
|
|
|
Examples:
|
|
'1s' -> (1, 's')
|
|
'5m' -> (5, 'm')
|
|
'1h' -> (1, 'h')
|
|
'1d' -> (1, 'd')
|
|
"""
|
|
match = re.match(r'^(\d+)([smhd])$', timeframe.lower())
|
|
if not match:
|
|
raise ValueError(f"Invalid timeframe format: {timeframe}")
|
|
number = int(match.group(1))
|
|
unit = match.group(2)
|
|
# Disallow zero or negative timeframes, as they are not meaningful for bucket intervals
|
|
if number <= 0:
|
|
raise ValueError(f"Timeframe value must be positive: {timeframe}")
|
|
return number, unit
|
|
|
|
|
|
def resample_candles_to_timeframe(df: pd.DataFrame, target_timeframe: str) -> pd.DataFrame:
|
|
"""
|
|
Resamples a DataFrame of OHLCV candles to a higher timeframe.
|
|
|
|
Args:
|
|
df (pd.DataFrame): Input DataFrame with a datetime index and 'open', 'high', 'low', 'close', 'volume',
|
|
and optionally 'trades_count' columns.
|
|
target_timeframe (str): The target timeframe for resampling (e.g., '1h', '1d').
|
|
|
|
Returns:
|
|
pd.DataFrame: Resampled DataFrame with OHLCV data for the target timeframe.
|
|
"""
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
# Ensure the DataFrame index is a datetime index
|
|
if not isinstance(df.index, pd.DatetimeIndex):
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
df = df.set_index('timestamp')
|
|
|
|
# Convert timedelta string to pandas frequency string
|
|
# '1m' -> '1T', '1h' -> '1H', '1d' -> '1D'
|
|
timeframe_map = {
|
|
's': 'S',
|
|
'm': 'T',
|
|
'h': 'H',
|
|
'd': 'D'
|
|
}
|
|
|
|
# Convert target_timeframe to pandas offset string
|
|
match = re.match(r'^(\d+)([smhd])$', target_timeframe.lower())
|
|
if not match:
|
|
raise ValueError(f"Invalid target timeframe format: {target_timeframe}")
|
|
number = match.group(1)
|
|
unit = timeframe_map.get(match.group(2))
|
|
if not unit:
|
|
raise ValueError(f"Unsupported timeframe unit: {target_timeframe}")
|
|
|
|
resample_freq = f"{number}{unit}"
|
|
|
|
# Define how to aggregate each column
|
|
ohlcv_dict = {
|
|
'open': 'first',
|
|
'high': 'max',
|
|
'low': 'min',
|
|
'close': 'last',
|
|
'volume': 'sum',
|
|
}
|
|
|
|
# Only include 'trades_count' if it exists in the DataFrame
|
|
if 'trades_count' in df.columns:
|
|
ohlcv_dict['trades_count'] = 'sum'
|
|
|
|
# Resample the data
|
|
resampled_df = df.resample(resample_freq).apply(ohlcv_dict)
|
|
|
|
# Drop rows where all OHLCV values are NaN (e.g., periods with no data)
|
|
resampled_df.dropna(subset=['open', 'high', 'low', 'close'], inplace=True)
|
|
|
|
# Fill NaN trades_count with 0 after resampling
|
|
if 'trades_count' in resampled_df.columns:
|
|
resampled_df['trades_count'] = resampled_df['trades_count'].fillna(0).astype(int)
|
|
|
|
return resampled_df
|
|
|
|
|
|
__all__ = [
|
|
'aggregate_trades_to_candles',
|
|
'validate_timeframe',
|
|
'parse_timeframe',
|
|
'resample_candles_to_timeframe'
|
|
] |