2025-06-13 18:01:10 +08:00

149 lines
4.5 KiB
Python

"""
Utility functions for market data aggregation.
This module provides common utility functions for working with OHLCV candles
and trade data aggregation.
"""
import re
from typing import List, Tuple
from utils.timeframe_utils import load_timeframe_options
import pandas as pd
from ..data_types import StandardizedTrade, OHLCVCandle
def aggregate_trades_to_candles(trades: List[StandardizedTrade],
timeframes: List[str],
symbol: str,
exchange: str) -> List[OHLCVCandle]:
"""
Simple utility function to aggregate a list of trades to candles.
Args:
trades: List of standardized trades
timeframes: List of timeframes to generate
symbol: Trading symbol
exchange: Exchange name
Returns:
List of completed candles
"""
from .batch import BatchCandleProcessor
processor = BatchCandleProcessor(symbol, exchange, timeframes)
return processor.process_trades_to_candles(iter(trades))
def validate_timeframe(timeframe: str) -> bool:
"""
Validate if timeframe is supported.
Args:
timeframe: Timeframe string (e.g., '1s', '5s', '10s', '1m', '5m', '1h')
Returns:
True if supported, False otherwise
"""
supported = [item['value'] for item in load_timeframe_options()]
return timeframe in supported
def parse_timeframe(timeframe: str) -> Tuple[int, str]:
"""
Parse timeframe string into number and unit.
Args:
timeframe: Timeframe string (e.g., '1s', '5m', '1h')
Returns:
Tuple of (number, unit)
Examples:
'1s' -> (1, 's')
'5m' -> (5, 'm')
'1h' -> (1, 'h')
'1d' -> (1, 'd')
"""
match = re.match(r'^(\d+)([smhd])$', timeframe.lower())
if not match:
raise ValueError(f"Invalid timeframe format: {timeframe}")
number = int(match.group(1))
unit = match.group(2)
# Disallow zero or negative timeframes, as they are not meaningful for bucket intervals
if number <= 0:
raise ValueError(f"Timeframe value must be positive: {timeframe}")
return number, unit
def resample_candles_to_timeframe(df: pd.DataFrame, target_timeframe: str) -> pd.DataFrame:
"""
Resamples a DataFrame of OHLCV candles to a higher timeframe.
Args:
df (pd.DataFrame): Input DataFrame with a datetime index and 'open', 'high', 'low', 'close', 'volume',
and optionally 'trades_count' columns.
target_timeframe (str): The target timeframe for resampling (e.g., '1h', '1d').
Returns:
pd.DataFrame: Resampled DataFrame with OHLCV data for the target timeframe.
"""
if df.empty:
return pd.DataFrame()
# Ensure the DataFrame index is a datetime index
if not isinstance(df.index, pd.DatetimeIndex):
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp')
# Convert timedelta string to pandas frequency string
# '1m' -> '1T', '1h' -> '1H', '1d' -> '1D'
timeframe_map = {
's': 'S',
'm': 'T',
'h': 'H',
'd': 'D'
}
# Convert target_timeframe to pandas offset string
match = re.match(r'^(\d+)([smhd])$', target_timeframe.lower())
if not match:
raise ValueError(f"Invalid target timeframe format: {target_timeframe}")
number = match.group(1)
unit = timeframe_map.get(match.group(2))
if not unit:
raise ValueError(f"Unsupported timeframe unit: {target_timeframe}")
resample_freq = f"{number}{unit}"
# Define how to aggregate each column
ohlcv_dict = {
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum',
}
# Only include 'trades_count' if it exists in the DataFrame
if 'trades_count' in df.columns:
ohlcv_dict['trades_count'] = 'sum'
# Resample the data
resampled_df = df.resample(resample_freq).apply(ohlcv_dict)
# Drop rows where all OHLCV values are NaN (e.g., periods with no data)
resampled_df.dropna(subset=['open', 'high', 'low', 'close'], inplace=True)
# Fill NaN trades_count with 0 after resampling
if 'trades_count' in resampled_df.columns:
resampled_df['trades_count'] = resampled_df['trades_count'].fillna(0).astype(int)
return resampled_df
__all__ = [
'aggregate_trades_to_candles',
'validate_timeframe',
'parse_timeframe',
'resample_candles_to_timeframe'
]