Refactor technical indicators to return DataFrames and enhance documentation

- Updated all technical indicators to return pandas DataFrames instead of lists, improving consistency and usability.
- Modified the `calculate` method in `TechnicalIndicators` to directly return DataFrames with relevant indicator values.
- Enhanced the `data_integration.py` to utilize the new DataFrame outputs for better integration with charting.
- Updated documentation to reflect the new DataFrame-centric approach, including usage examples and output structures.
- Improved error handling to ensure empty DataFrames are returned when insufficient data is available.

These changes streamline the indicator calculations and improve the overall architecture, aligning with project standards for maintainability and performance.
This commit is contained in:
Vasily.onl
2025-06-09 16:28:16 +08:00
parent fc3cac24bd
commit ec8f5514bb
14 changed files with 542 additions and 542 deletions

View File

@@ -94,138 +94,122 @@ class TestTechnicalIndicators:
def test_prepare_dataframe(self, indicators, sample_candles):
"""Test DataFrame preparation from OHLCV candles."""
df = indicators.prepare_dataframe(sample_candles)
df = indicators._prepare_dataframe_from_list(sample_candles)
assert not df.empty
assert len(df) == len(sample_candles)
assert list(df.columns) == ['symbol', 'timeframe', 'open', 'high', 'low', 'close', 'volume', 'trade_count']
assert list(df.columns) == ['symbol', 'timeframe', 'open', 'high', 'low', 'close', 'volume', 'trade_count', 'timestamp']
assert df.index.name == 'timestamp'
# Check that timestamps are sorted
assert df.index.is_monotonic_increasing
def test_prepare_dataframe_empty(self, indicators):
"""Test DataFrame preparation with empty candles list."""
df = indicators.prepare_dataframe([])
df = indicators._prepare_dataframe_from_list([])
assert df.empty
def test_sma_calculation(self, indicators, sample_candles):
"""Test Simple Moving Average calculation."""
"""Test Simple Moving Average calculation (now returns DataFrame)."""
period = 5
results = indicators.sma(sample_candles, period)
# Should have results starting from period 5
assert len(results) == len(sample_candles) - period + 1
# Check first result
first_result = results[0]
assert isinstance(first_result, IndicatorResult)
assert first_result.symbol == 'BTC-USDT'
assert first_result.timeframe == '1m'
assert 'sma' in first_result.values
assert first_result.metadata['period'] == period
# Verify SMA calculation manually for first result
first_5_closes = [float(candle.close) for candle in sample_candles[:5]]
expected_sma = sum(first_5_closes) / len(first_5_closes)
assert abs(first_result.values['sma'] - expected_sma) < 0.001
df = indicators._prepare_dataframe_from_list(sample_candles)
df['timestamp'] = df.index
result_df = indicators.sma(df, period)
assert isinstance(result_df, pd.DataFrame)
assert not result_df.empty
assert 'sma' in result_df.columns
# Find the correct rolling window for the first SMA value
first_ts = result_df.index[0]
first_idx = [candle.end_time for candle in sample_candles].index(first_ts)
window_closes = [float(candle.close) for candle in sample_candles[first_idx - period + 1:first_idx + 1]]
expected_sma = sum(window_closes) / len(window_closes)
assert abs(result_df.iloc[0]['sma'] - expected_sma) < 0.001
def test_sma_insufficient_data(self, indicators, sample_candles):
"""Test SMA with insufficient data."""
"""Test SMA with insufficient data (now returns DataFrame)."""
period = 50 # More than available candles
results = indicators.sma(sample_candles, period)
assert len(results) == 0
df = indicators._prepare_dataframe_from_list(sample_candles)
df['timestamp'] = df.index
result_df = indicators.sma(df, period)
assert isinstance(result_df, pd.DataFrame)
assert result_df.empty
def test_ema_calculation(self, indicators, sample_candles):
"""Test Exponential Moving Average calculation."""
"""Test Exponential Moving Average calculation (now returns DataFrame)."""
period = 10
results = indicators.ema(sample_candles, period)
df = indicators._prepare_dataframe_from_list(sample_candles)
df['timestamp'] = df.index
result_df = indicators.ema(df, period)
# Should have results starting from period 10
assert len(results) == len(sample_candles) - period + 1
# Check first result
first_result = results[0]
assert isinstance(first_result, IndicatorResult)
assert 'ema' in first_result.values
assert first_result.metadata['period'] == period
assert isinstance(result_df, pd.DataFrame)
assert len(result_df) == len(sample_candles) - period + 1
assert 'ema' in result_df.columns
# EMA should be between the range of input prices
min_price = min(float(c.close) for c in sample_candles[:period])
max_price = max(float(c.close) for c in sample_candles[:period])
assert min_price <= first_result.values['ema'] <= max_price
assert min_price <= result_df.iloc[0]['ema'] <= max_price
def test_rsi_calculation(self, indicators, sample_candles):
"""Test Relative Strength Index calculation."""
"""Test Relative Strength Index calculation (now returns DataFrame)."""
period = 14
results = indicators.rsi(sample_candles, period)
# Should have results starting from period 15 (period + 1 for price change calculation)
assert len(results) == len(sample_candles) - period
# Check first result
first_result = results[0]
assert isinstance(first_result, IndicatorResult)
assert 'rsi' in first_result.values
assert 0 <= first_result.values['rsi'] <= 100 # RSI should be between 0 and 100
assert first_result.metadata['period'] == period
df = indicators._prepare_dataframe_from_list(sample_candles)
df['timestamp'] = df.index
result_df = indicators.rsi(df, period)
assert isinstance(result_df, pd.DataFrame)
assert not result_df.empty
assert 'rsi' in result_df.columns
assert 0 <= result_df.iloc[0]['rsi'] <= 100
def test_macd_calculation(self, indicators, sample_candles):
"""Test MACD calculation."""
"""Test MACD calculation (now returns DataFrame)."""
fast_period = 12
slow_period = 26
signal_period = 9
results = indicators.macd(sample_candles, fast_period, slow_period, signal_period)
# MACD needs slow_period + signal_period data points
expected_count = len(sample_candles) - slow_period - signal_period + 1
assert len(results) == max(0, expected_count)
if results: # Only test if we have results
first_result = results[0]
assert isinstance(first_result, IndicatorResult)
assert 'macd' in first_result.values
assert 'signal' in first_result.values
assert 'histogram' in first_result.values
df = indicators._prepare_dataframe_from_list(sample_candles)
df['timestamp'] = df.index
result_df = indicators.macd(df, fast_period, slow_period, signal_period)
# MACD results start after max(slow_period, signal_period) - 1 rows
min_required = max(slow_period, signal_period)
expected_count = max(0, len(sample_candles) - (min_required - 1))
assert isinstance(result_df, pd.DataFrame)
assert len(result_df) == expected_count
assert 'macd' in result_df.columns
assert 'signal' in result_df.columns
assert 'histogram' in result_df.columns
if not result_df.empty:
# Histogram should equal MACD - Signal
expected_histogram = first_result.values['macd'] - first_result.values['signal']
assert abs(first_result.values['histogram'] - expected_histogram) < 0.001
first_row = result_df.iloc[0]
expected_histogram = first_row['macd'] - first_row['signal']
assert abs(first_row['histogram'] - expected_histogram) < 0.001
def test_bollinger_bands_calculation(self, indicators, sample_candles):
"""Test Bollinger Bands calculation."""
"""Test Bollinger Bands calculation (now returns DataFrame)."""
period = 20
std_dev = 2.0
results = indicators.bollinger_bands(sample_candles, period, std_dev)
df = indicators._prepare_dataframe_from_list(sample_candles)
df['timestamp'] = df.index
result_df = indicators.bollinger_bands(df, period, std_dev)
# Should have results starting from period 20
assert len(results) == len(sample_candles) - period + 1
# Check first result
first_result = results[0]
assert isinstance(first_result, IndicatorResult)
assert 'upper_band' in first_result.values
assert 'middle_band' in first_result.values
assert 'lower_band' in first_result.values
assert 'bandwidth' in first_result.values
assert 'percent_b' in first_result.values
assert isinstance(result_df, pd.DataFrame)
assert len(result_df) == len(sample_candles) - period + 1
assert 'upper_band' in result_df.columns
assert 'middle_band' in result_df.columns
assert 'lower_band' in result_df.columns
# Upper band should be greater than middle band, which should be greater than lower band
assert first_result.values['upper_band'] > first_result.values['middle_band']
assert first_result.values['middle_band'] > first_result.values['lower_band']
first_row = result_df.iloc[0]
assert first_row['upper_band'] > first_row['middle_band']
assert first_row['middle_band'] > first_row['lower_band']
def test_sparse_data_handling(self, indicators, sparse_candles):
"""Test indicators with sparse data (time gaps)."""
period = 5
sma_results = indicators.sma(sparse_candles, period)
df = indicators._prepare_dataframe_from_list(sparse_candles)
df['timestamp'] = df.index
sma_df = indicators.sma(df, period)
# Should handle sparse data without issues
assert len(sma_results) > 0
assert not sma_df.empty
# Check that timestamps are preserved correctly
for result in sma_results:
assert result.timestamp is not None
assert isinstance(result.timestamp, datetime)
for ts in sma_df.index:
assert ts is not None
assert isinstance(ts, datetime)
def test_calculate_multiple_indicators(self, indicators, sample_candles):
"""Test calculating multiple indicators at once."""
@@ -236,16 +220,15 @@ class TestTechnicalIndicators:
'macd': {'type': 'macd'},
'bb_20': {'type': 'bollinger_bands', 'period': 20}
}
results = indicators.calculate_multiple_indicators(sample_candles, config)
df = indicators._prepare_dataframe_from_list(sample_candles)
df['timestamp'] = df.index
results = indicators.calculate_multiple_indicators(df, config)
assert len(results) == len(config)
assert 'sma_10' in results
assert 'ema_12' in results
assert 'rsi_14' in results
assert 'macd' in results
assert 'bb_20' in results
# Check that each indicator has appropriate results
assert len(results['sma_10']) > 0
assert len(results['ema_12']) > 0
@@ -255,21 +238,20 @@ class TestTechnicalIndicators:
config = {
'invalid_indicator': {'type': 'unknown_type', 'period': 10}
}
results = indicators.calculate_multiple_indicators(sample_candles, config)
df = indicators._prepare_dataframe_from_list(sample_candles)
results = indicators.calculate_multiple_indicators(df, config)
assert 'invalid_indicator' in results
assert len(results['invalid_indicator']) == 0 # Should return empty list
def test_different_price_columns(self, indicators, sample_candles):
"""Test indicators with different price columns."""
"""Test indicators with different price columns (now returns DataFrame)."""
df = indicators._prepare_dataframe_from_list(sample_candles)
# Test SMA with 'high' price column
sma_high = indicators.sma(sample_candles, 5, price_column='high')
sma_close = indicators.sma(sample_candles, 5, price_column='close')
sma_high = indicators.sma(df, 5, price_column='high')
sma_close = indicators.sma(df, 5, price_column='close')
assert len(sma_high) == len(sma_close)
# High prices should generally give higher SMA values
assert sma_high[0].values['sma'] >= sma_close[0].values['sma']
assert sma_high.iloc[0]['sma'] >= sma_close.iloc[0]['sma']
class TestIndicatorHelperFunctions:

View File

@@ -190,11 +190,9 @@ class TestTechnicalIndicatorsSafety:
"""Test indicators with sparse data (time gaps)."""
period = 5
df = indicators._prepare_dataframe_from_list(sparse_candles)
sma_results = indicators.sma(df, period)
assert len(sma_results) > 0
# Verify that gaps are preserved (no interpolation)
timestamps = [r.timestamp for r in sma_results]
sma_df = indicators.sma(df, period)
assert not sma_df.empty
timestamps = sma_df.index.to_list()
for i in range(1, len(timestamps)):
time_diff = timestamps[i] - timestamps[i-1]
assert time_diff >= timedelta(minutes=1)