358 lines
13 KiB
Python
358 lines
13 KiB
Python
|
|
"""
|
||
|
|
Test Incremental Indicators vs Original Implementations
|
||
|
|
|
||
|
|
This script validates that incremental indicators (Bollinger Bands, RSI) produce
|
||
|
|
identical results to the original batch implementations using real market data.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
import numpy as np
|
||
|
|
import logging
|
||
|
|
from datetime import datetime
|
||
|
|
import matplotlib.pyplot as plt
|
||
|
|
|
||
|
|
# Import original implementations
|
||
|
|
from cycles.Analysis.boillinger_band import BollingerBands
|
||
|
|
from cycles.Analysis.rsi import RSI
|
||
|
|
|
||
|
|
# Import incremental implementations
|
||
|
|
from cycles.IncStrategies.indicators.bollinger_bands import BollingerBandsState
|
||
|
|
from cycles.IncStrategies.indicators.rsi import RSIState
|
||
|
|
from cycles.IncStrategies.indicators.base import SimpleIndicatorState
|
||
|
|
|
||
|
|
# Import storage utility
|
||
|
|
from cycles.utils.storage import Storage
|
||
|
|
|
||
|
|
# Setup logging
|
||
|
|
logging.basicConfig(
|
||
|
|
level=logging.INFO,
|
||
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
||
|
|
handlers=[
|
||
|
|
logging.FileHandler("test_incremental.log"),
|
||
|
|
logging.StreamHandler()
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
class WildersRSIState(SimpleIndicatorState):
|
||
|
|
"""
|
||
|
|
RSI implementation using Wilder's smoothing to match the original implementation.
|
||
|
|
|
||
|
|
Wilder's smoothing uses alpha = 1/period instead of 2/(period+1).
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, period: int = 14):
|
||
|
|
super().__init__(period)
|
||
|
|
self.alpha = 1.0 / period # Wilder's smoothing factor
|
||
|
|
self.avg_gain = None
|
||
|
|
self.avg_loss = None
|
||
|
|
self.previous_close = None
|
||
|
|
self.is_initialized = True
|
||
|
|
|
||
|
|
def update(self, new_close: float) -> float:
|
||
|
|
"""Update RSI with Wilder's smoothing."""
|
||
|
|
if not isinstance(new_close, (int, float)):
|
||
|
|
raise TypeError(f"new_close must be numeric, got {type(new_close)}")
|
||
|
|
|
||
|
|
self.validate_input(new_close)
|
||
|
|
new_close = float(new_close)
|
||
|
|
|
||
|
|
if self.previous_close is None:
|
||
|
|
# First value - no gain/loss to calculate
|
||
|
|
self.previous_close = new_close
|
||
|
|
self.values_received += 1
|
||
|
|
self._current_value = 50.0
|
||
|
|
return self._current_value
|
||
|
|
|
||
|
|
# Calculate price change
|
||
|
|
price_change = new_close - self.previous_close
|
||
|
|
gain = max(price_change, 0.0)
|
||
|
|
loss = max(-price_change, 0.0)
|
||
|
|
|
||
|
|
if self.avg_gain is None:
|
||
|
|
# Initialize with first gain/loss
|
||
|
|
self.avg_gain = gain
|
||
|
|
self.avg_loss = loss
|
||
|
|
else:
|
||
|
|
# Wilder's smoothing: avg = alpha * new_value + (1 - alpha) * previous_avg
|
||
|
|
self.avg_gain = self.alpha * gain + (1 - self.alpha) * self.avg_gain
|
||
|
|
self.avg_loss = self.alpha * loss + (1 - self.alpha) * self.avg_loss
|
||
|
|
|
||
|
|
# Calculate RSI
|
||
|
|
if self.avg_loss == 0.0:
|
||
|
|
rsi_value = 100.0 if self.avg_gain > 0 else 50.0
|
||
|
|
else:
|
||
|
|
rs = self.avg_gain / self.avg_loss
|
||
|
|
rsi_value = 100.0 - (100.0 / (1.0 + rs))
|
||
|
|
|
||
|
|
# Store state
|
||
|
|
self.previous_close = new_close
|
||
|
|
self.values_received += 1
|
||
|
|
self._current_value = rsi_value
|
||
|
|
|
||
|
|
return rsi_value
|
||
|
|
|
||
|
|
def is_warmed_up(self) -> bool:
|
||
|
|
"""Check if RSI is warmed up."""
|
||
|
|
return self.values_received >= self.period
|
||
|
|
|
||
|
|
def reset(self) -> None:
|
||
|
|
"""Reset RSI state."""
|
||
|
|
self.avg_gain = None
|
||
|
|
self.avg_loss = None
|
||
|
|
self.previous_close = None
|
||
|
|
self.values_received = 0
|
||
|
|
self._current_value = None
|
||
|
|
|
||
|
|
def load_test_data():
|
||
|
|
"""Load 2023-2024 BTC data for testing."""
|
||
|
|
storage = Storage(logging=logging)
|
||
|
|
|
||
|
|
# Load data for 2023-2024 period
|
||
|
|
start_date = "2023-01-01"
|
||
|
|
end_date = "2024-12-31"
|
||
|
|
|
||
|
|
data = storage.load_data("btcusd_1-min_data.csv", start_date, end_date)
|
||
|
|
|
||
|
|
if data.empty:
|
||
|
|
logging.error("No data loaded for testing period")
|
||
|
|
return None
|
||
|
|
|
||
|
|
logging.info(f"Loaded {len(data)} rows of data from {data.index[0]} to {data.index[-1]}")
|
||
|
|
return data
|
||
|
|
|
||
|
|
def test_bollinger_bands(data, period=20, std_multiplier=2.0):
|
||
|
|
"""Test Bollinger Bands: incremental vs batch implementation."""
|
||
|
|
logging.info(f"Testing Bollinger Bands (period={period}, std_multiplier={std_multiplier})")
|
||
|
|
|
||
|
|
# Original batch implementation - fix config structure
|
||
|
|
config = {
|
||
|
|
"bb_period": period,
|
||
|
|
"bb_width": 0.05, # Required for market regime detection
|
||
|
|
"trending": {
|
||
|
|
"bb_std_dev_multiplier": std_multiplier
|
||
|
|
},
|
||
|
|
"sideways": {
|
||
|
|
"bb_std_dev_multiplier": std_multiplier
|
||
|
|
}
|
||
|
|
}
|
||
|
|
bb_calculator = BollingerBands(config=config)
|
||
|
|
original_result = bb_calculator.calculate(data.copy())
|
||
|
|
|
||
|
|
# Incremental implementation
|
||
|
|
bb_state = BollingerBandsState(period=period, std_dev_multiplier=std_multiplier)
|
||
|
|
|
||
|
|
incremental_upper = []
|
||
|
|
incremental_middle = []
|
||
|
|
incremental_lower = []
|
||
|
|
incremental_bandwidth = []
|
||
|
|
|
||
|
|
for close_price in data['close']:
|
||
|
|
result = bb_state.update(close_price)
|
||
|
|
incremental_upper.append(result['upper_band'])
|
||
|
|
incremental_middle.append(result['middle_band'])
|
||
|
|
incremental_lower.append(result['lower_band'])
|
||
|
|
incremental_bandwidth.append(result['bandwidth'])
|
||
|
|
|
||
|
|
# Create incremental DataFrame
|
||
|
|
incremental_result = pd.DataFrame({
|
||
|
|
'UpperBand': incremental_upper,
|
||
|
|
'SMA': incremental_middle,
|
||
|
|
'LowerBand': incremental_lower,
|
||
|
|
'BBWidth': incremental_bandwidth
|
||
|
|
}, index=data.index)
|
||
|
|
|
||
|
|
# Compare results
|
||
|
|
comparison_results = {}
|
||
|
|
|
||
|
|
for col_orig, col_inc in [('UpperBand', 'UpperBand'), ('SMA', 'SMA'),
|
||
|
|
('LowerBand', 'LowerBand'), ('BBWidth', 'BBWidth')]:
|
||
|
|
if col_orig in original_result.columns:
|
||
|
|
# Skip NaN values for comparison (warm-up period)
|
||
|
|
valid_mask = ~(original_result[col_orig].isna() | incremental_result[col_inc].isna())
|
||
|
|
|
||
|
|
if valid_mask.sum() > 0:
|
||
|
|
orig_values = original_result[col_orig][valid_mask]
|
||
|
|
inc_values = incremental_result[col_inc][valid_mask]
|
||
|
|
|
||
|
|
max_diff = np.abs(orig_values - inc_values).max()
|
||
|
|
mean_diff = np.abs(orig_values - inc_values).mean()
|
||
|
|
|
||
|
|
comparison_results[col_orig] = {
|
||
|
|
'max_diff': max_diff,
|
||
|
|
'mean_diff': mean_diff,
|
||
|
|
'identical': max_diff < 1e-10
|
||
|
|
}
|
||
|
|
|
||
|
|
logging.info(f"BB {col_orig}: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
|
||
|
|
|
||
|
|
return comparison_results, original_result, incremental_result
|
||
|
|
|
||
|
|
def test_rsi(data, period=14):
|
||
|
|
"""Test RSI: incremental vs batch implementation."""
|
||
|
|
logging.info(f"Testing RSI (period={period})")
|
||
|
|
|
||
|
|
# Original batch implementation
|
||
|
|
config = {"rsi_period": period}
|
||
|
|
rsi_calculator = RSI(config=config)
|
||
|
|
original_result = rsi_calculator.calculate(data.copy(), price_column='close')
|
||
|
|
|
||
|
|
# Test both standard EMA and Wilder's smoothing
|
||
|
|
rsi_state_standard = RSIState(period=period)
|
||
|
|
rsi_state_wilders = WildersRSIState(period=period)
|
||
|
|
|
||
|
|
incremental_rsi_standard = []
|
||
|
|
incremental_rsi_wilders = []
|
||
|
|
|
||
|
|
for close_price in data['close']:
|
||
|
|
rsi_value_standard = rsi_state_standard.update(close_price)
|
||
|
|
rsi_value_wilders = rsi_state_wilders.update(close_price)
|
||
|
|
incremental_rsi_standard.append(rsi_value_standard)
|
||
|
|
incremental_rsi_wilders.append(rsi_value_wilders)
|
||
|
|
|
||
|
|
# Create incremental DataFrames
|
||
|
|
incremental_result_standard = pd.DataFrame({
|
||
|
|
'RSI': incremental_rsi_standard
|
||
|
|
}, index=data.index)
|
||
|
|
|
||
|
|
incremental_result_wilders = pd.DataFrame({
|
||
|
|
'RSI': incremental_rsi_wilders
|
||
|
|
}, index=data.index)
|
||
|
|
|
||
|
|
# Compare results
|
||
|
|
comparison_results = {}
|
||
|
|
|
||
|
|
if 'RSI' in original_result.columns:
|
||
|
|
# Test standard EMA
|
||
|
|
valid_mask = ~(original_result['RSI'].isna() | incremental_result_standard['RSI'].isna())
|
||
|
|
if valid_mask.sum() > 0:
|
||
|
|
orig_values = original_result['RSI'][valid_mask]
|
||
|
|
inc_values = incremental_result_standard['RSI'][valid_mask]
|
||
|
|
|
||
|
|
max_diff = np.abs(orig_values - inc_values).max()
|
||
|
|
mean_diff = np.abs(orig_values - inc_values).mean()
|
||
|
|
|
||
|
|
comparison_results['RSI_Standard'] = {
|
||
|
|
'max_diff': max_diff,
|
||
|
|
'mean_diff': mean_diff,
|
||
|
|
'identical': max_diff < 1e-10
|
||
|
|
}
|
||
|
|
|
||
|
|
logging.info(f"RSI Standard EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
|
||
|
|
|
||
|
|
# Test Wilder's smoothing
|
||
|
|
valid_mask = ~(original_result['RSI'].isna() | incremental_result_wilders['RSI'].isna())
|
||
|
|
if valid_mask.sum() > 0:
|
||
|
|
orig_values = original_result['RSI'][valid_mask]
|
||
|
|
inc_values = incremental_result_wilders['RSI'][valid_mask]
|
||
|
|
|
||
|
|
max_diff = np.abs(orig_values - inc_values).max()
|
||
|
|
mean_diff = np.abs(orig_values - inc_values).mean()
|
||
|
|
|
||
|
|
comparison_results['RSI_Wilders'] = {
|
||
|
|
'max_diff': max_diff,
|
||
|
|
'mean_diff': mean_diff,
|
||
|
|
'identical': max_diff < 1e-10
|
||
|
|
}
|
||
|
|
|
||
|
|
logging.info(f"RSI Wilder's EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
|
||
|
|
|
||
|
|
return comparison_results, original_result, incremental_result_wilders
|
||
|
|
|
||
|
|
def plot_comparison(original, incremental, indicator_name, save_path=None):
|
||
|
|
"""Plot comparison between original and incremental implementations."""
|
||
|
|
fig, axes = plt.subplots(2, 1, figsize=(15, 10))
|
||
|
|
|
||
|
|
# Plot first 1000 points for visibility
|
||
|
|
plot_data = min(1000, len(original))
|
||
|
|
x_range = range(plot_data)
|
||
|
|
|
||
|
|
if indicator_name == "Bollinger Bands":
|
||
|
|
# Plot Bollinger Bands
|
||
|
|
axes[0].plot(x_range, original['UpperBand'].iloc[:plot_data], 'b-', label='Original Upper', alpha=0.7)
|
||
|
|
axes[0].plot(x_range, original['SMA'].iloc[:plot_data], 'g-', label='Original SMA', alpha=0.7)
|
||
|
|
axes[0].plot(x_range, original['LowerBand'].iloc[:plot_data], 'r-', label='Original Lower', alpha=0.7)
|
||
|
|
|
||
|
|
axes[0].plot(x_range, incremental['UpperBand'].iloc[:plot_data], 'b--', label='Incremental Upper', alpha=0.7)
|
||
|
|
axes[0].plot(x_range, incremental['SMA'].iloc[:plot_data], 'g--', label='Incremental SMA', alpha=0.7)
|
||
|
|
axes[0].plot(x_range, incremental['LowerBand'].iloc[:plot_data], 'r--', label='Incremental Lower', alpha=0.7)
|
||
|
|
|
||
|
|
# Plot differences
|
||
|
|
axes[1].plot(x_range, (original['UpperBand'] - incremental['UpperBand']).iloc[:plot_data], 'b-', label='Upper Diff')
|
||
|
|
axes[1].plot(x_range, (original['SMA'] - incremental['SMA']).iloc[:plot_data], 'g-', label='SMA Diff')
|
||
|
|
axes[1].plot(x_range, (original['LowerBand'] - incremental['LowerBand']).iloc[:plot_data], 'r-', label='Lower Diff')
|
||
|
|
|
||
|
|
elif indicator_name == "RSI":
|
||
|
|
# Plot RSI
|
||
|
|
axes[0].plot(x_range, original['RSI'].iloc[:plot_data], 'b-', label='Original RSI', alpha=0.7)
|
||
|
|
axes[0].plot(x_range, incremental['RSI'].iloc[:plot_data], 'r--', label='Incremental RSI', alpha=0.7)
|
||
|
|
|
||
|
|
# Plot differences
|
||
|
|
axes[1].plot(x_range, (original['RSI'] - incremental['RSI']).iloc[:plot_data], 'g-', label='RSI Diff')
|
||
|
|
|
||
|
|
axes[0].set_title(f'{indicator_name} Comparison: Original vs Incremental')
|
||
|
|
axes[0].legend()
|
||
|
|
axes[0].grid(True)
|
||
|
|
|
||
|
|
axes[1].set_title(f'{indicator_name} Differences')
|
||
|
|
axes[1].legend()
|
||
|
|
axes[1].grid(True)
|
||
|
|
axes[1].set_xlabel('Time Index')
|
||
|
|
|
||
|
|
plt.tight_layout()
|
||
|
|
|
||
|
|
if save_path:
|
||
|
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||
|
|
logging.info(f"Plot saved to {save_path}")
|
||
|
|
|
||
|
|
plt.show()
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""Main test function."""
|
||
|
|
logging.info("Starting incremental indicators validation test")
|
||
|
|
|
||
|
|
# Load test data
|
||
|
|
data = load_test_data()
|
||
|
|
if data is None:
|
||
|
|
return
|
||
|
|
|
||
|
|
# Test with subset for faster execution during development
|
||
|
|
test_data = data.iloc[:10000] # First 10k rows for testing
|
||
|
|
logging.info(f"Using {len(test_data)} rows for testing")
|
||
|
|
|
||
|
|
# Test Bollinger Bands
|
||
|
|
logging.info("=" * 50)
|
||
|
|
bb_comparison, bb_original, bb_incremental = test_bollinger_bands(test_data)
|
||
|
|
|
||
|
|
# Test RSI
|
||
|
|
logging.info("=" * 50)
|
||
|
|
rsi_comparison, rsi_original, rsi_incremental = test_rsi(test_data)
|
||
|
|
|
||
|
|
# Summary
|
||
|
|
logging.info("=" * 50)
|
||
|
|
logging.info("VALIDATION SUMMARY:")
|
||
|
|
|
||
|
|
all_identical = True
|
||
|
|
|
||
|
|
for indicator, results in bb_comparison.items():
|
||
|
|
status = "PASS" if results['identical'] else "FAIL"
|
||
|
|
logging.info(f"Bollinger Bands {indicator}: {status}")
|
||
|
|
if not results['identical']:
|
||
|
|
all_identical = False
|
||
|
|
|
||
|
|
for indicator, results in rsi_comparison.items():
|
||
|
|
status = "PASS" if results['identical'] else "FAIL"
|
||
|
|
logging.info(f"RSI {indicator}: {status}")
|
||
|
|
if not results['identical']:
|
||
|
|
all_identical = False
|
||
|
|
|
||
|
|
if all_identical:
|
||
|
|
logging.info("ALL TESTS PASSED - Incremental indicators are identical to original implementations!")
|
||
|
|
else:
|
||
|
|
logging.warning("Some tests failed - Check differences above")
|
||
|
|
|
||
|
|
# Generate comparison plots
|
||
|
|
plot_comparison(bb_original, bb_incremental, "Bollinger Bands", "bb_comparison.png")
|
||
|
|
plot_comparison(rsi_original, rsi_incremental, "RSI", "rsi_comparison.png")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|