Cycles/test/test_incremental_indicators.py
Vasily.onl bd6a0f05d7 Implement Incremental BBRS Strategy for Real-time Data Processing
- Introduced `BBRSIncrementalState` for real-time processing of the Bollinger Bands + RSI strategy, allowing minute-level data input and internal timeframe aggregation.
- Added `TimeframeAggregator` class to handle real-time data aggregation to higher timeframes (15min, 1h, etc.).
- Updated `README_BBRS.md` to document the new incremental strategy, including key features and usage examples.
- Created comprehensive tests to validate the incremental strategy against the original implementation, ensuring signal accuracy and performance consistency.
- Enhanced error handling and logging for better monitoring during real-time processing.
- Updated `TODO.md` to reflect the completion of the incremental BBRS strategy implementation.
2025-05-26 16:46:04 +08:00

358 lines
13 KiB
Python

"""
Test Incremental Indicators vs Original Implementations
This script validates that incremental indicators (Bollinger Bands, RSI) produce
identical results to the original batch implementations using real market data.
"""
import pandas as pd
import numpy as np
import logging
from datetime import datetime
import matplotlib.pyplot as plt
# Import original implementations
from cycles.Analysis.boillinger_band import BollingerBands
from cycles.Analysis.rsi import RSI
# Import incremental implementations
from cycles.IncStrategies.indicators.bollinger_bands import BollingerBandsState
from cycles.IncStrategies.indicators.rsi import RSIState
from cycles.IncStrategies.indicators.base import SimpleIndicatorState
# Import storage utility
from cycles.utils.storage import Storage
# Setup logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("test_incremental.log"),
logging.StreamHandler()
]
)
class WildersRSIState(SimpleIndicatorState):
"""
RSI implementation using Wilder's smoothing to match the original implementation.
Wilder's smoothing uses alpha = 1/period instead of 2/(period+1).
"""
def __init__(self, period: int = 14):
super().__init__(period)
self.alpha = 1.0 / period # Wilder's smoothing factor
self.avg_gain = None
self.avg_loss = None
self.previous_close = None
self.is_initialized = True
def update(self, new_close: float) -> float:
"""Update RSI with Wilder's smoothing."""
if not isinstance(new_close, (int, float)):
raise TypeError(f"new_close must be numeric, got {type(new_close)}")
self.validate_input(new_close)
new_close = float(new_close)
if self.previous_close is None:
# First value - no gain/loss to calculate
self.previous_close = new_close
self.values_received += 1
self._current_value = 50.0
return self._current_value
# Calculate price change
price_change = new_close - self.previous_close
gain = max(price_change, 0.0)
loss = max(-price_change, 0.0)
if self.avg_gain is None:
# Initialize with first gain/loss
self.avg_gain = gain
self.avg_loss = loss
else:
# Wilder's smoothing: avg = alpha * new_value + (1 - alpha) * previous_avg
self.avg_gain = self.alpha * gain + (1 - self.alpha) * self.avg_gain
self.avg_loss = self.alpha * loss + (1 - self.alpha) * self.avg_loss
# Calculate RSI
if self.avg_loss == 0.0:
rsi_value = 100.0 if self.avg_gain > 0 else 50.0
else:
rs = self.avg_gain / self.avg_loss
rsi_value = 100.0 - (100.0 / (1.0 + rs))
# Store state
self.previous_close = new_close
self.values_received += 1
self._current_value = rsi_value
return rsi_value
def is_warmed_up(self) -> bool:
"""Check if RSI is warmed up."""
return self.values_received >= self.period
def reset(self) -> None:
"""Reset RSI state."""
self.avg_gain = None
self.avg_loss = None
self.previous_close = None
self.values_received = 0
self._current_value = None
def load_test_data():
"""Load 2023-2024 BTC data for testing."""
storage = Storage(logging=logging)
# Load data for 2023-2024 period
start_date = "2023-01-01"
end_date = "2024-12-31"
data = storage.load_data("btcusd_1-min_data.csv", start_date, end_date)
if data.empty:
logging.error("No data loaded for testing period")
return None
logging.info(f"Loaded {len(data)} rows of data from {data.index[0]} to {data.index[-1]}")
return data
def test_bollinger_bands(data, period=20, std_multiplier=2.0):
"""Test Bollinger Bands: incremental vs batch implementation."""
logging.info(f"Testing Bollinger Bands (period={period}, std_multiplier={std_multiplier})")
# Original batch implementation - fix config structure
config = {
"bb_period": period,
"bb_width": 0.05, # Required for market regime detection
"trending": {
"bb_std_dev_multiplier": std_multiplier
},
"sideways": {
"bb_std_dev_multiplier": std_multiplier
}
}
bb_calculator = BollingerBands(config=config)
original_result = bb_calculator.calculate(data.copy())
# Incremental implementation
bb_state = BollingerBandsState(period=period, std_dev_multiplier=std_multiplier)
incremental_upper = []
incremental_middle = []
incremental_lower = []
incremental_bandwidth = []
for close_price in data['close']:
result = bb_state.update(close_price)
incremental_upper.append(result['upper_band'])
incremental_middle.append(result['middle_band'])
incremental_lower.append(result['lower_band'])
incremental_bandwidth.append(result['bandwidth'])
# Create incremental DataFrame
incremental_result = pd.DataFrame({
'UpperBand': incremental_upper,
'SMA': incremental_middle,
'LowerBand': incremental_lower,
'BBWidth': incremental_bandwidth
}, index=data.index)
# Compare results
comparison_results = {}
for col_orig, col_inc in [('UpperBand', 'UpperBand'), ('SMA', 'SMA'),
('LowerBand', 'LowerBand'), ('BBWidth', 'BBWidth')]:
if col_orig in original_result.columns:
# Skip NaN values for comparison (warm-up period)
valid_mask = ~(original_result[col_orig].isna() | incremental_result[col_inc].isna())
if valid_mask.sum() > 0:
orig_values = original_result[col_orig][valid_mask]
inc_values = incremental_result[col_inc][valid_mask]
max_diff = np.abs(orig_values - inc_values).max()
mean_diff = np.abs(orig_values - inc_values).mean()
comparison_results[col_orig] = {
'max_diff': max_diff,
'mean_diff': mean_diff,
'identical': max_diff < 1e-10
}
logging.info(f"BB {col_orig}: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
return comparison_results, original_result, incremental_result
def test_rsi(data, period=14):
"""Test RSI: incremental vs batch implementation."""
logging.info(f"Testing RSI (period={period})")
# Original batch implementation
config = {"rsi_period": period}
rsi_calculator = RSI(config=config)
original_result = rsi_calculator.calculate(data.copy(), price_column='close')
# Test both standard EMA and Wilder's smoothing
rsi_state_standard = RSIState(period=period)
rsi_state_wilders = WildersRSIState(period=period)
incremental_rsi_standard = []
incremental_rsi_wilders = []
for close_price in data['close']:
rsi_value_standard = rsi_state_standard.update(close_price)
rsi_value_wilders = rsi_state_wilders.update(close_price)
incremental_rsi_standard.append(rsi_value_standard)
incremental_rsi_wilders.append(rsi_value_wilders)
# Create incremental DataFrames
incremental_result_standard = pd.DataFrame({
'RSI': incremental_rsi_standard
}, index=data.index)
incremental_result_wilders = pd.DataFrame({
'RSI': incremental_rsi_wilders
}, index=data.index)
# Compare results
comparison_results = {}
if 'RSI' in original_result.columns:
# Test standard EMA
valid_mask = ~(original_result['RSI'].isna() | incremental_result_standard['RSI'].isna())
if valid_mask.sum() > 0:
orig_values = original_result['RSI'][valid_mask]
inc_values = incremental_result_standard['RSI'][valid_mask]
max_diff = np.abs(orig_values - inc_values).max()
mean_diff = np.abs(orig_values - inc_values).mean()
comparison_results['RSI_Standard'] = {
'max_diff': max_diff,
'mean_diff': mean_diff,
'identical': max_diff < 1e-10
}
logging.info(f"RSI Standard EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
# Test Wilder's smoothing
valid_mask = ~(original_result['RSI'].isna() | incremental_result_wilders['RSI'].isna())
if valid_mask.sum() > 0:
orig_values = original_result['RSI'][valid_mask]
inc_values = incremental_result_wilders['RSI'][valid_mask]
max_diff = np.abs(orig_values - inc_values).max()
mean_diff = np.abs(orig_values - inc_values).mean()
comparison_results['RSI_Wilders'] = {
'max_diff': max_diff,
'mean_diff': mean_diff,
'identical': max_diff < 1e-10
}
logging.info(f"RSI Wilder's EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
return comparison_results, original_result, incremental_result_wilders
def plot_comparison(original, incremental, indicator_name, save_path=None):
"""Plot comparison between original and incremental implementations."""
fig, axes = plt.subplots(2, 1, figsize=(15, 10))
# Plot first 1000 points for visibility
plot_data = min(1000, len(original))
x_range = range(plot_data)
if indicator_name == "Bollinger Bands":
# Plot Bollinger Bands
axes[0].plot(x_range, original['UpperBand'].iloc[:plot_data], 'b-', label='Original Upper', alpha=0.7)
axes[0].plot(x_range, original['SMA'].iloc[:plot_data], 'g-', label='Original SMA', alpha=0.7)
axes[0].plot(x_range, original['LowerBand'].iloc[:plot_data], 'r-', label='Original Lower', alpha=0.7)
axes[0].plot(x_range, incremental['UpperBand'].iloc[:plot_data], 'b--', label='Incremental Upper', alpha=0.7)
axes[0].plot(x_range, incremental['SMA'].iloc[:plot_data], 'g--', label='Incremental SMA', alpha=0.7)
axes[0].plot(x_range, incremental['LowerBand'].iloc[:plot_data], 'r--', label='Incremental Lower', alpha=0.7)
# Plot differences
axes[1].plot(x_range, (original['UpperBand'] - incremental['UpperBand']).iloc[:plot_data], 'b-', label='Upper Diff')
axes[1].plot(x_range, (original['SMA'] - incremental['SMA']).iloc[:plot_data], 'g-', label='SMA Diff')
axes[1].plot(x_range, (original['LowerBand'] - incremental['LowerBand']).iloc[:plot_data], 'r-', label='Lower Diff')
elif indicator_name == "RSI":
# Plot RSI
axes[0].plot(x_range, original['RSI'].iloc[:plot_data], 'b-', label='Original RSI', alpha=0.7)
axes[0].plot(x_range, incremental['RSI'].iloc[:plot_data], 'r--', label='Incremental RSI', alpha=0.7)
# Plot differences
axes[1].plot(x_range, (original['RSI'] - incremental['RSI']).iloc[:plot_data], 'g-', label='RSI Diff')
axes[0].set_title(f'{indicator_name} Comparison: Original vs Incremental')
axes[0].legend()
axes[0].grid(True)
axes[1].set_title(f'{indicator_name} Differences')
axes[1].legend()
axes[1].grid(True)
axes[1].set_xlabel('Time Index')
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
logging.info(f"Plot saved to {save_path}")
plt.show()
def main():
"""Main test function."""
logging.info("Starting incremental indicators validation test")
# Load test data
data = load_test_data()
if data is None:
return
# Test with subset for faster execution during development
test_data = data.iloc[:10000] # First 10k rows for testing
logging.info(f"Using {len(test_data)} rows for testing")
# Test Bollinger Bands
logging.info("=" * 50)
bb_comparison, bb_original, bb_incremental = test_bollinger_bands(test_data)
# Test RSI
logging.info("=" * 50)
rsi_comparison, rsi_original, rsi_incremental = test_rsi(test_data)
# Summary
logging.info("=" * 50)
logging.info("VALIDATION SUMMARY:")
all_identical = True
for indicator, results in bb_comparison.items():
status = "PASS" if results['identical'] else "FAIL"
logging.info(f"Bollinger Bands {indicator}: {status}")
if not results['identical']:
all_identical = False
for indicator, results in rsi_comparison.items():
status = "PASS" if results['identical'] else "FAIL"
logging.info(f"RSI {indicator}: {status}")
if not results['identical']:
all_identical = False
if all_identical:
logging.info("ALL TESTS PASSED - Incremental indicators are identical to original implementations!")
else:
logging.warning("Some tests failed - Check differences above")
# Generate comparison plots
plot_comparison(bb_original, bb_incremental, "Bollinger Bands", "bb_comparison.png")
plot_comparison(rsi_original, rsi_incremental, "RSI", "rsi_comparison.png")
if __name__ == "__main__":
main()