Implement Incremental BBRS Strategy for Real-time Data Processing

- Introduced `BBRSIncrementalState` for real-time processing of the Bollinger Bands + RSI strategy, allowing minute-level data input and internal timeframe aggregation.
- Added `TimeframeAggregator` class to handle real-time data aggregation to higher timeframes (15min, 1h, etc.).
- Updated `README_BBRS.md` to document the new incremental strategy, including key features and usage examples.
- Created comprehensive tests to validate the incremental strategy against the original implementation, ensuring signal accuracy and performance consistency.
- Enhanced error handling and logging for better monitoring during real-time processing.
- Updated `TODO.md` to reflect the completion of the incremental BBRS strategy implementation.
This commit is contained in:
Vasily.onl
2025-05-26 16:46:04 +08:00
parent ba78539cbb
commit bd6a0f05d7
10 changed files with 2239 additions and 62 deletions

View File

@@ -0,0 +1,112 @@
"""
Debug RSI Differences
This script performs a detailed analysis of RSI calculation differences
between the original and incremental implementations.
"""
import pandas as pd
import numpy as np
import logging
from cycles.Analysis.rsi import RSI
from cycles.utils.storage import Storage
# Setup logging
logging.basicConfig(level=logging.INFO)
def debug_rsi_calculation():
"""Debug RSI calculation step by step."""
# Load small sample of data
storage = Storage(logging=logging)
data = storage.load_data("btcusd_1-min_data.csv", "2023-01-01", "2023-01-02")
# Take first 50 rows for detailed analysis
test_data = data.iloc[:50].copy()
print(f"Analyzing {len(test_data)} data points")
print(f"Price range: {test_data['close'].min():.2f} - {test_data['close'].max():.2f}")
# Original implementation
config = {"rsi_period": 14}
rsi_calculator = RSI(config=config)
original_result = rsi_calculator.calculate(test_data.copy(), price_column='close')
# Manual step-by-step calculation to understand the original
prices = test_data['close'].values
period = 14
print("\nStep-by-step manual calculation:")
print("Index | Price | Delta | Gain | Loss | AvgGain | AvgLoss | RS | RSI_Manual | RSI_Original")
print("-" * 100)
deltas = np.diff(prices)
gains = np.where(deltas > 0, deltas, 0)
losses = np.where(deltas < 0, -deltas, 0)
# Calculate using pandas EMA with Wilder's smoothing
gain_series = pd.Series(gains, index=test_data.index[1:])
loss_series = pd.Series(losses, index=test_data.index[1:])
# Wilder's smoothing: alpha = 1/period, adjust=False
avg_gain = gain_series.ewm(alpha=1/period, adjust=False, min_periods=period).mean()
avg_loss = loss_series.ewm(alpha=1/period, adjust=False, min_periods=period).mean()
rs_manual = avg_gain / avg_loss.replace(0, 1e-9)
rsi_manual = 100 - (100 / (1 + rs_manual))
# Handle edge cases
rsi_manual[avg_loss == 0] = np.where(avg_gain[avg_loss == 0] > 0, 100, 50)
rsi_manual[avg_gain.isna() | avg_loss.isna()] = np.nan
# Compare with original
for i in range(min(30, len(test_data))):
price = prices[i]
if i == 0:
print(f"{i:5d} | {price:7.2f} | - | - | - | - | - | - | - | -")
else:
delta = deltas[i-1]
gain = gains[i-1]
loss = losses[i-1]
# Get values from series (may be NaN)
avg_g = avg_gain.iloc[i-1] if i-1 < len(avg_gain) else np.nan
avg_l = avg_loss.iloc[i-1] if i-1 < len(avg_loss) else np.nan
rs_val = rs_manual.iloc[i-1] if i-1 < len(rs_manual) else np.nan
rsi_man = rsi_manual.iloc[i-1] if i-1 < len(rsi_manual) else np.nan
# Get original RSI
rsi_orig = original_result['RSI'].iloc[i] if 'RSI' in original_result.columns else np.nan
print(f"{i:5d} | {price:7.2f} | {delta:5.2f} | {gain:4.2f} | {loss:4.2f} | {avg_g:7.4f} | {avg_l:7.4f} | {rs_val:2.1f} | {rsi_man:10.4f} | {rsi_orig:10.4f}")
# Now test incremental implementation
print("\n" + "="*80)
print("INCREMENTAL IMPLEMENTATION TEST")
print("="*80)
# Test incremental
from cycles.IncStrategies.indicators.rsi import RSIState
debug_rsi = RSIState(period=14)
incremental_results = []
print("\nTesting corrected incremental RSI:")
for i, price in enumerate(prices[:20]): # First 20 values
rsi_val = debug_rsi.update(price)
incremental_results.append(rsi_val)
print(f"Step {i+1}: price={price:.2f}, RSI={rsi_val:.4f}")
print("\nComparison of first 20 values:")
print("Index | Original RSI | Incremental RSI | Difference")
print("-" * 50)
for i in range(min(20, len(original_result))):
orig_rsi = original_result['RSI'].iloc[i] if 'RSI' in original_result.columns else np.nan
inc_rsi = incremental_results[i] if i < len(incremental_results) else np.nan
diff = abs(orig_rsi - inc_rsi) if not (np.isnan(orig_rsi) or np.isnan(inc_rsi)) else np.nan
print(f"{i:5d} | {orig_rsi:11.4f} | {inc_rsi:14.4f} | {diff:10.4f}")
if __name__ == "__main__":
debug_rsi_calculation()

View File

@@ -0,0 +1,289 @@
"""
Test Incremental BBRS Strategy vs Original Implementation
This script validates that the incremental BBRS strategy produces
equivalent results to the original batch implementation.
"""
import pandas as pd
import numpy as np
import logging
from datetime import datetime
import matplotlib.pyplot as plt
# Import original implementation
from cycles.Analysis.bb_rsi import BollingerBandsStrategy
# Import incremental implementation
from cycles.IncStrategies.bbrs_incremental import BBRSIncrementalState
# Import storage utility
from cycles.utils.storage import Storage
# Import aggregation function to match original behavior
from cycles.utils.data_utils import aggregate_to_minutes
# Setup logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("test_bbrs_incremental.log"),
logging.StreamHandler()
]
)
def load_test_data():
"""Load 2023-2024 BTC data for testing."""
storage = Storage(logging=logging)
# Load data for testing period
start_date = "2023-01-01"
end_date = "2023-01-07" # One week for faster testing
data = storage.load_data("btcusd_1-min_data.csv", start_date, end_date)
if data.empty:
logging.error("No data loaded for testing period")
return None
logging.info(f"Loaded {len(data)} rows of data from {data.index[0]} to {data.index[-1]}")
return data
def test_bbrs_strategy_comparison():
"""Test incremental BBRS vs original implementation."""
# Load test data
data = load_test_data()
if data is None:
return
# Use subset for testing
test_data = data.copy() # First 5000 rows
logging.info(f"Using {len(test_data)} rows for testing")
# Aggregate to hourly to match original strategy
hourly_data = data = aggregate_to_minutes(data, 15)
# hourly_data = test_data.copy()
logging.info(f"Aggregated to {len(hourly_data)} hourly data points")
# Configuration
config = {
"bb_width": 0.05,
"bb_period": 20,
"rsi_period": 14,
"trending": {
"rsi_threshold": [30, 70],
"bb_std_dev_multiplier": 2.5,
},
"sideways": {
"rsi_threshold": [40, 60],
"bb_std_dev_multiplier": 1.8,
},
"strategy_name": "MarketRegimeStrategy",
"SqueezeStrategy": True
}
logging.info("Testing original BBRS implementation...")
# Original implementation (already aggregates internally)
original_strategy = BollingerBandsStrategy(config=config, logging=logging)
original_result = original_strategy.run(test_data.copy(), "MarketRegimeStrategy")
logging.info("Testing incremental BBRS implementation...")
# Incremental implementation (use pre-aggregated data)
incremental_strategy = BBRSIncrementalState(config)
incremental_results = []
# Process hourly data incrementally
for i, (timestamp, row) in enumerate(hourly_data.iterrows()):
ohlcv_data = {
'open': row['open'],
'high': row['high'],
'low': row['low'],
'close': row['close'],
'volume': row['volume']
}
result = incremental_strategy.update(ohlcv_data)
result['timestamp'] = timestamp
incremental_results.append(result)
if i % 50 == 0: # Log every 50 hourly points
logging.info(f"Processed {i+1}/{len(hourly_data)} hourly data points")
# Convert incremental results to DataFrame
incremental_df = pd.DataFrame(incremental_results)
incremental_df.set_index('timestamp', inplace=True)
logging.info("Comparing results...")
# Compare key metrics after warm-up period
warmup_period = max(config["bb_period"], config["rsi_period"]) + 20 # Add volume MA period
if len(original_result) > warmup_period and len(incremental_df) > warmup_period:
# Compare after warm-up
orig_warmed = original_result.iloc[warmup_period:]
inc_warmed = incremental_df.iloc[warmup_period:]
# Align indices
common_index = orig_warmed.index.intersection(inc_warmed.index)
orig_aligned = orig_warmed.loc[common_index]
inc_aligned = inc_warmed.loc[common_index]
logging.info(f"Comparing {len(common_index)} aligned data points after warm-up")
# Compare signals
if 'BuySignal' in orig_aligned.columns and 'buy_signal' in inc_aligned.columns:
buy_signal_match = (orig_aligned['BuySignal'] == inc_aligned['buy_signal']).mean()
logging.info(f"Buy signal match rate: {buy_signal_match:.4f} ({buy_signal_match*100:.2f}%)")
buy_signals_orig = orig_aligned['BuySignal'].sum()
buy_signals_inc = inc_aligned['buy_signal'].sum()
logging.info(f"Buy signals - Original: {buy_signals_orig}, Incremental: {buy_signals_inc}")
if 'SellSignal' in orig_aligned.columns and 'sell_signal' in inc_aligned.columns:
sell_signal_match = (orig_aligned['SellSignal'] == inc_aligned['sell_signal']).mean()
logging.info(f"Sell signal match rate: {sell_signal_match:.4f} ({sell_signal_match*100:.2f}%)")
sell_signals_orig = orig_aligned['SellSignal'].sum()
sell_signals_inc = inc_aligned['sell_signal'].sum()
logging.info(f"Sell signals - Original: {sell_signals_orig}, Incremental: {sell_signals_inc}")
# Compare RSI values
if 'RSI' in orig_aligned.columns and 'rsi' in inc_aligned.columns:
# Filter out NaN values
valid_mask = ~(orig_aligned['RSI'].isna() | inc_aligned['rsi'].isna())
if valid_mask.sum() > 0:
rsi_orig = orig_aligned['RSI'][valid_mask]
rsi_inc = inc_aligned['rsi'][valid_mask]
rsi_diff = np.abs(rsi_orig - rsi_inc)
rsi_max_diff = rsi_diff.max()
rsi_mean_diff = rsi_diff.mean()
logging.info(f"RSI comparison - Max diff: {rsi_max_diff:.6f}, Mean diff: {rsi_mean_diff:.6f}")
# Compare Bollinger Bands
bb_comparisons = [
('UpperBand', 'upper_band'),
('LowerBand', 'lower_band'),
('SMA', 'middle_band')
]
for orig_col, inc_col in bb_comparisons:
if orig_col in orig_aligned.columns and inc_col in inc_aligned.columns:
valid_mask = ~(orig_aligned[orig_col].isna() | inc_aligned[inc_col].isna())
if valid_mask.sum() > 0:
orig_vals = orig_aligned[orig_col][valid_mask]
inc_vals = inc_aligned[inc_col][valid_mask]
diff = np.abs(orig_vals - inc_vals)
max_diff = diff.max()
mean_diff = diff.mean()
logging.info(f"{orig_col} comparison - Max diff: {max_diff:.6f}, Mean diff: {mean_diff:.6f}")
# Plot comparison for visual inspection
plot_comparison(orig_aligned, inc_aligned)
else:
logging.warning("Not enough data after warm-up period for comparison")
def plot_comparison(original_df, incremental_df, save_path="bbrs_strategy_comparison.png"):
"""Plot comparison between original and incremental BBRS strategies."""
# Plot first 1000 points for visibility
plot_points = min(1000, len(original_df), len(incremental_df))
fig, axes = plt.subplots(4, 1, figsize=(15, 12))
x_range = range(plot_points)
# Plot 1: Price and Bollinger Bands
if all(col in original_df.columns for col in ['close', 'UpperBand', 'LowerBand', 'SMA']):
axes[0].plot(x_range, original_df['close'].iloc[:plot_points], 'k-', label='Price', alpha=0.7)
axes[0].plot(x_range, original_df['UpperBand'].iloc[:plot_points], 'b-', label='Original Upper BB', alpha=0.7)
axes[0].plot(x_range, original_df['SMA'].iloc[:plot_points], 'g-', label='Original SMA', alpha=0.7)
axes[0].plot(x_range, original_df['LowerBand'].iloc[:plot_points], 'r-', label='Original Lower BB', alpha=0.7)
if all(col in incremental_df.columns for col in ['upper_band', 'lower_band', 'middle_band']):
axes[0].plot(x_range, incremental_df['upper_band'].iloc[:plot_points], 'b--', label='Incremental Upper BB', alpha=0.7)
axes[0].plot(x_range, incremental_df['middle_band'].iloc[:plot_points], 'g--', label='Incremental SMA', alpha=0.7)
axes[0].plot(x_range, incremental_df['lower_band'].iloc[:plot_points], 'r--', label='Incremental Lower BB', alpha=0.7)
axes[0].set_title('Bollinger Bands Comparison')
axes[0].legend()
axes[0].grid(True)
# Plot 2: RSI
if 'RSI' in original_df.columns and 'rsi' in incremental_df.columns:
axes[1].plot(x_range, original_df['RSI'].iloc[:plot_points], 'b-', label='Original RSI', alpha=0.7)
axes[1].plot(x_range, incremental_df['rsi'].iloc[:plot_points], 'r--', label='Incremental RSI', alpha=0.7)
axes[1].axhline(y=70, color='gray', linestyle=':', alpha=0.5)
axes[1].axhline(y=30, color='gray', linestyle=':', alpha=0.5)
axes[1].set_title('RSI Comparison')
axes[1].legend()
axes[1].grid(True)
# Plot 3: Buy/Sell Signals
if 'BuySignal' in original_df.columns and 'buy_signal' in incremental_df.columns:
buy_orig = original_df['BuySignal'].iloc[:plot_points]
buy_inc = incremental_df['buy_signal'].iloc[:plot_points]
# Plot as scatter points where signals occur
buy_orig_idx = [i for i, val in enumerate(buy_orig) if val]
buy_inc_idx = [i for i, val in enumerate(buy_inc) if val]
axes[2].scatter(buy_orig_idx, [1]*len(buy_orig_idx), color='green', marker='^',
label='Original Buy', alpha=0.7, s=30)
axes[2].scatter(buy_inc_idx, [0.8]*len(buy_inc_idx), color='blue', marker='^',
label='Incremental Buy', alpha=0.7, s=30)
if 'SellSignal' in original_df.columns and 'sell_signal' in incremental_df.columns:
sell_orig = original_df['SellSignal'].iloc[:plot_points]
sell_inc = incremental_df['sell_signal'].iloc[:plot_points]
sell_orig_idx = [i for i, val in enumerate(sell_orig) if val]
sell_inc_idx = [i for i, val in enumerate(sell_inc) if val]
axes[2].scatter(sell_orig_idx, [0.6]*len(sell_orig_idx), color='red', marker='v',
label='Original Sell', alpha=0.7, s=30)
axes[2].scatter(sell_inc_idx, [0.4]*len(sell_inc_idx), color='orange', marker='v',
label='Incremental Sell', alpha=0.7, s=30)
axes[2].set_title('Trading Signals Comparison')
axes[2].legend()
axes[2].grid(True)
axes[2].set_ylim(0, 1.2)
# Plot 4: Market Regime
if 'market_regime' in incremental_df.columns:
regime_numeric = [1 if regime == 'sideways' else 0 for regime in incremental_df['market_regime'].iloc[:plot_points]]
axes[3].plot(x_range, regime_numeric, 'purple', label='Market Regime (1=Sideways, 0=Trending)', alpha=0.7)
axes[3].set_title('Market Regime Detection')
axes[3].legend()
axes[3].grid(True)
axes[3].set_xlabel('Time Index')
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
logging.info(f"Comparison plot saved to {save_path}")
plt.show()
def main():
"""Main test function."""
logging.info("Starting BBRS incremental strategy validation test")
try:
test_bbrs_strategy_comparison()
logging.info("BBRS incremental strategy test completed successfully!")
except Exception as e:
logging.error(f"Test failed with error: {e}")
raise
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,358 @@
"""
Test Incremental Indicators vs Original Implementations
This script validates that incremental indicators (Bollinger Bands, RSI) produce
identical results to the original batch implementations using real market data.
"""
import pandas as pd
import numpy as np
import logging
from datetime import datetime
import matplotlib.pyplot as plt
# Import original implementations
from cycles.Analysis.boillinger_band import BollingerBands
from cycles.Analysis.rsi import RSI
# Import incremental implementations
from cycles.IncStrategies.indicators.bollinger_bands import BollingerBandsState
from cycles.IncStrategies.indicators.rsi import RSIState
from cycles.IncStrategies.indicators.base import SimpleIndicatorState
# Import storage utility
from cycles.utils.storage import Storage
# Setup logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("test_incremental.log"),
logging.StreamHandler()
]
)
class WildersRSIState(SimpleIndicatorState):
"""
RSI implementation using Wilder's smoothing to match the original implementation.
Wilder's smoothing uses alpha = 1/period instead of 2/(period+1).
"""
def __init__(self, period: int = 14):
super().__init__(period)
self.alpha = 1.0 / period # Wilder's smoothing factor
self.avg_gain = None
self.avg_loss = None
self.previous_close = None
self.is_initialized = True
def update(self, new_close: float) -> float:
"""Update RSI with Wilder's smoothing."""
if not isinstance(new_close, (int, float)):
raise TypeError(f"new_close must be numeric, got {type(new_close)}")
self.validate_input(new_close)
new_close = float(new_close)
if self.previous_close is None:
# First value - no gain/loss to calculate
self.previous_close = new_close
self.values_received += 1
self._current_value = 50.0
return self._current_value
# Calculate price change
price_change = new_close - self.previous_close
gain = max(price_change, 0.0)
loss = max(-price_change, 0.0)
if self.avg_gain is None:
# Initialize with first gain/loss
self.avg_gain = gain
self.avg_loss = loss
else:
# Wilder's smoothing: avg = alpha * new_value + (1 - alpha) * previous_avg
self.avg_gain = self.alpha * gain + (1 - self.alpha) * self.avg_gain
self.avg_loss = self.alpha * loss + (1 - self.alpha) * self.avg_loss
# Calculate RSI
if self.avg_loss == 0.0:
rsi_value = 100.0 if self.avg_gain > 0 else 50.0
else:
rs = self.avg_gain / self.avg_loss
rsi_value = 100.0 - (100.0 / (1.0 + rs))
# Store state
self.previous_close = new_close
self.values_received += 1
self._current_value = rsi_value
return rsi_value
def is_warmed_up(self) -> bool:
"""Check if RSI is warmed up."""
return self.values_received >= self.period
def reset(self) -> None:
"""Reset RSI state."""
self.avg_gain = None
self.avg_loss = None
self.previous_close = None
self.values_received = 0
self._current_value = None
def load_test_data():
"""Load 2023-2024 BTC data for testing."""
storage = Storage(logging=logging)
# Load data for 2023-2024 period
start_date = "2023-01-01"
end_date = "2024-12-31"
data = storage.load_data("btcusd_1-min_data.csv", start_date, end_date)
if data.empty:
logging.error("No data loaded for testing period")
return None
logging.info(f"Loaded {len(data)} rows of data from {data.index[0]} to {data.index[-1]}")
return data
def test_bollinger_bands(data, period=20, std_multiplier=2.0):
"""Test Bollinger Bands: incremental vs batch implementation."""
logging.info(f"Testing Bollinger Bands (period={period}, std_multiplier={std_multiplier})")
# Original batch implementation - fix config structure
config = {
"bb_period": period,
"bb_width": 0.05, # Required for market regime detection
"trending": {
"bb_std_dev_multiplier": std_multiplier
},
"sideways": {
"bb_std_dev_multiplier": std_multiplier
}
}
bb_calculator = BollingerBands(config=config)
original_result = bb_calculator.calculate(data.copy())
# Incremental implementation
bb_state = BollingerBandsState(period=period, std_dev_multiplier=std_multiplier)
incremental_upper = []
incremental_middle = []
incremental_lower = []
incremental_bandwidth = []
for close_price in data['close']:
result = bb_state.update(close_price)
incremental_upper.append(result['upper_band'])
incremental_middle.append(result['middle_band'])
incremental_lower.append(result['lower_band'])
incremental_bandwidth.append(result['bandwidth'])
# Create incremental DataFrame
incremental_result = pd.DataFrame({
'UpperBand': incremental_upper,
'SMA': incremental_middle,
'LowerBand': incremental_lower,
'BBWidth': incremental_bandwidth
}, index=data.index)
# Compare results
comparison_results = {}
for col_orig, col_inc in [('UpperBand', 'UpperBand'), ('SMA', 'SMA'),
('LowerBand', 'LowerBand'), ('BBWidth', 'BBWidth')]:
if col_orig in original_result.columns:
# Skip NaN values for comparison (warm-up period)
valid_mask = ~(original_result[col_orig].isna() | incremental_result[col_inc].isna())
if valid_mask.sum() > 0:
orig_values = original_result[col_orig][valid_mask]
inc_values = incremental_result[col_inc][valid_mask]
max_diff = np.abs(orig_values - inc_values).max()
mean_diff = np.abs(orig_values - inc_values).mean()
comparison_results[col_orig] = {
'max_diff': max_diff,
'mean_diff': mean_diff,
'identical': max_diff < 1e-10
}
logging.info(f"BB {col_orig}: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
return comparison_results, original_result, incremental_result
def test_rsi(data, period=14):
"""Test RSI: incremental vs batch implementation."""
logging.info(f"Testing RSI (period={period})")
# Original batch implementation
config = {"rsi_period": period}
rsi_calculator = RSI(config=config)
original_result = rsi_calculator.calculate(data.copy(), price_column='close')
# Test both standard EMA and Wilder's smoothing
rsi_state_standard = RSIState(period=period)
rsi_state_wilders = WildersRSIState(period=period)
incremental_rsi_standard = []
incremental_rsi_wilders = []
for close_price in data['close']:
rsi_value_standard = rsi_state_standard.update(close_price)
rsi_value_wilders = rsi_state_wilders.update(close_price)
incremental_rsi_standard.append(rsi_value_standard)
incremental_rsi_wilders.append(rsi_value_wilders)
# Create incremental DataFrames
incremental_result_standard = pd.DataFrame({
'RSI': incremental_rsi_standard
}, index=data.index)
incremental_result_wilders = pd.DataFrame({
'RSI': incremental_rsi_wilders
}, index=data.index)
# Compare results
comparison_results = {}
if 'RSI' in original_result.columns:
# Test standard EMA
valid_mask = ~(original_result['RSI'].isna() | incremental_result_standard['RSI'].isna())
if valid_mask.sum() > 0:
orig_values = original_result['RSI'][valid_mask]
inc_values = incremental_result_standard['RSI'][valid_mask]
max_diff = np.abs(orig_values - inc_values).max()
mean_diff = np.abs(orig_values - inc_values).mean()
comparison_results['RSI_Standard'] = {
'max_diff': max_diff,
'mean_diff': mean_diff,
'identical': max_diff < 1e-10
}
logging.info(f"RSI Standard EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
# Test Wilder's smoothing
valid_mask = ~(original_result['RSI'].isna() | incremental_result_wilders['RSI'].isna())
if valid_mask.sum() > 0:
orig_values = original_result['RSI'][valid_mask]
inc_values = incremental_result_wilders['RSI'][valid_mask]
max_diff = np.abs(orig_values - inc_values).max()
mean_diff = np.abs(orig_values - inc_values).mean()
comparison_results['RSI_Wilders'] = {
'max_diff': max_diff,
'mean_diff': mean_diff,
'identical': max_diff < 1e-10
}
logging.info(f"RSI Wilder's EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")
return comparison_results, original_result, incremental_result_wilders
def plot_comparison(original, incremental, indicator_name, save_path=None):
"""Plot comparison between original and incremental implementations."""
fig, axes = plt.subplots(2, 1, figsize=(15, 10))
# Plot first 1000 points for visibility
plot_data = min(1000, len(original))
x_range = range(plot_data)
if indicator_name == "Bollinger Bands":
# Plot Bollinger Bands
axes[0].plot(x_range, original['UpperBand'].iloc[:plot_data], 'b-', label='Original Upper', alpha=0.7)
axes[0].plot(x_range, original['SMA'].iloc[:plot_data], 'g-', label='Original SMA', alpha=0.7)
axes[0].plot(x_range, original['LowerBand'].iloc[:plot_data], 'r-', label='Original Lower', alpha=0.7)
axes[0].plot(x_range, incremental['UpperBand'].iloc[:plot_data], 'b--', label='Incremental Upper', alpha=0.7)
axes[0].plot(x_range, incremental['SMA'].iloc[:plot_data], 'g--', label='Incremental SMA', alpha=0.7)
axes[0].plot(x_range, incremental['LowerBand'].iloc[:plot_data], 'r--', label='Incremental Lower', alpha=0.7)
# Plot differences
axes[1].plot(x_range, (original['UpperBand'] - incremental['UpperBand']).iloc[:plot_data], 'b-', label='Upper Diff')
axes[1].plot(x_range, (original['SMA'] - incremental['SMA']).iloc[:plot_data], 'g-', label='SMA Diff')
axes[1].plot(x_range, (original['LowerBand'] - incremental['LowerBand']).iloc[:plot_data], 'r-', label='Lower Diff')
elif indicator_name == "RSI":
# Plot RSI
axes[0].plot(x_range, original['RSI'].iloc[:plot_data], 'b-', label='Original RSI', alpha=0.7)
axes[0].plot(x_range, incremental['RSI'].iloc[:plot_data], 'r--', label='Incremental RSI', alpha=0.7)
# Plot differences
axes[1].plot(x_range, (original['RSI'] - incremental['RSI']).iloc[:plot_data], 'g-', label='RSI Diff')
axes[0].set_title(f'{indicator_name} Comparison: Original vs Incremental')
axes[0].legend()
axes[0].grid(True)
axes[1].set_title(f'{indicator_name} Differences')
axes[1].legend()
axes[1].grid(True)
axes[1].set_xlabel('Time Index')
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
logging.info(f"Plot saved to {save_path}")
plt.show()
def main():
"""Main test function."""
logging.info("Starting incremental indicators validation test")
# Load test data
data = load_test_data()
if data is None:
return
# Test with subset for faster execution during development
test_data = data.iloc[:10000] # First 10k rows for testing
logging.info(f"Using {len(test_data)} rows for testing")
# Test Bollinger Bands
logging.info("=" * 50)
bb_comparison, bb_original, bb_incremental = test_bollinger_bands(test_data)
# Test RSI
logging.info("=" * 50)
rsi_comparison, rsi_original, rsi_incremental = test_rsi(test_data)
# Summary
logging.info("=" * 50)
logging.info("VALIDATION SUMMARY:")
all_identical = True
for indicator, results in bb_comparison.items():
status = "PASS" if results['identical'] else "FAIL"
logging.info(f"Bollinger Bands {indicator}: {status}")
if not results['identical']:
all_identical = False
for indicator, results in rsi_comparison.items():
status = "PASS" if results['identical'] else "FAIL"
logging.info(f"RSI {indicator}: {status}")
if not results['identical']:
all_identical = False
if all_identical:
logging.info("ALL TESTS PASSED - Incremental indicators are identical to original implementations!")
else:
logging.warning("Some tests failed - Check differences above")
# Generate comparison plots
plot_comparison(bb_original, bb_incremental, "Bollinger Bands", "bb_comparison.png")
plot_comparison(rsi_original, rsi_incremental, "RSI", "rsi_comparison.png")
if __name__ == "__main__":
main()

81
test/test_pandas_ema.py Normal file
View File

@@ -0,0 +1,81 @@
"""
Test pandas EMA behavior to understand Wilder's smoothing initialization
"""
import pandas as pd
import numpy as np
def test_pandas_ema():
"""Test how pandas EMA works with Wilder's smoothing."""
# Sample data from our debug
prices = [16568.00, 16569.00, 16569.00, 16568.00, 16565.00, 16565.00,
16565.00, 16565.00, 16565.00, 16565.00, 16566.00, 16566.00,
16563.00, 16566.00, 16566.00, 16566.00, 16566.00, 16566.00]
# Calculate deltas
deltas = np.diff(prices)
gains = np.where(deltas > 0, deltas, 0)
losses = np.where(deltas < 0, -deltas, 0)
print("Price changes:")
for i, (delta, gain, loss) in enumerate(zip(deltas, gains, losses)):
print(f"Step {i+1}: delta={delta:5.2f}, gain={gain:4.2f}, loss={loss:4.2f}")
# Create series
gain_series = pd.Series(gains)
loss_series = pd.Series(losses)
period = 14
alpha = 1.0 / period
print(f"\nUsing period={period}, alpha={alpha:.6f}")
# Test different EMA parameters
print("\n1. Standard EMA with min_periods=period:")
avg_gain_1 = gain_series.ewm(alpha=alpha, adjust=False, min_periods=period).mean()
avg_loss_1 = loss_series.ewm(alpha=alpha, adjust=False, min_periods=period).mean()
print("Index | Gain | Loss | AvgGain | AvgLoss | RS | RSI")
print("-" * 60)
for i in range(min(len(avg_gain_1), 18)):
gain = gains[i] if i < len(gains) else 0
loss = losses[i] if i < len(losses) else 0
avg_g = avg_gain_1.iloc[i]
avg_l = avg_loss_1.iloc[i]
if not (pd.isna(avg_g) or pd.isna(avg_l)) and avg_l != 0:
rs = avg_g / avg_l
rsi = 100 - (100 / (1 + rs))
else:
rs = np.nan
rsi = np.nan
print(f"{i:5d} | {gain:4.2f} | {loss:4.2f} | {avg_g:7.4f} | {avg_l:7.4f} | {rs:4.2f} | {rsi:6.2f}")
print("\n2. EMA with min_periods=1:")
avg_gain_2 = gain_series.ewm(alpha=alpha, adjust=False, min_periods=1).mean()
avg_loss_2 = loss_series.ewm(alpha=alpha, adjust=False, min_periods=1).mean()
print("Index | Gain | Loss | AvgGain | AvgLoss | RS | RSI")
print("-" * 60)
for i in range(min(len(avg_gain_2), 18)):
gain = gains[i] if i < len(gains) else 0
loss = losses[i] if i < len(losses) else 0
avg_g = avg_gain_2.iloc[i]
avg_l = avg_loss_2.iloc[i]
if not (pd.isna(avg_g) or pd.isna(avg_l)) and avg_l != 0:
rs = avg_g / avg_l
rsi = 100 - (100 / (1 + rs))
elif avg_l == 0 and avg_g > 0:
rs = np.inf
rsi = 100.0
else:
rs = np.nan
rsi = np.nan
print(f"{i:5d} | {gain:4.2f} | {loss:4.2f} | {avg_g:7.4f} | {avg_l:7.4f} | {rs:4.2f} | {rsi:6.2f}")
if __name__ == "__main__":
test_pandas_ema()

396
test/test_realtime_bbrs.py Normal file
View File

@@ -0,0 +1,396 @@
"""
Test Real-time BBRS Strategy with Minute-level Data
This script validates that the incremental BBRS strategy can:
1. Accept minute-level data input (real-time simulation)
2. Internally aggregate to configured timeframes (15min, 1h, etc.)
3. Generate signals only when timeframe bars complete
4. Produce identical results to pre-aggregated data processing
"""
import pandas as pd
import numpy as np
import logging
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
# Import incremental implementation
from cycles.IncStrategies.bbrs_incremental import BBRSIncrementalState
# Import storage utility
from cycles.utils.storage import Storage
# Setup logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("test_realtime_bbrs.log"),
logging.StreamHandler()
]
)
def load_minute_data():
"""Load minute-level BTC data for real-time simulation."""
storage = Storage(logging=logging)
# Load data for testing period
start_date = "2023-01-01"
end_date = "2023-01-03" # 2 days for testing
data = storage.load_data("btcusd_1-min_data.csv", start_date, end_date)
if data.empty:
logging.error("No data loaded for testing period")
return None
logging.info(f"Loaded {len(data)} minute-level data points from {data.index[0]} to {data.index[-1]}")
return data
def test_timeframe_aggregation():
"""Test different timeframe aggregations with minute-level data."""
# Load minute data
minute_data = load_minute_data()
if minute_data is None:
return
# Test different timeframes
timeframes = [15, 60] # 15min and 1h
for timeframe_minutes in timeframes:
logging.info(f"\n{'='*60}")
logging.info(f"Testing {timeframe_minutes}-minute timeframe")
logging.info(f"{'='*60}")
# Configuration for this timeframe
config = {
"timeframe_minutes": timeframe_minutes,
"bb_period": 20,
"rsi_period": 14,
"bb_width": 0.05,
"trending": {
"rsi_threshold": [30, 70],
"bb_std_dev_multiplier": 2.5,
},
"sideways": {
"rsi_threshold": [40, 60],
"bb_std_dev_multiplier": 1.8,
},
"SqueezeStrategy": True
}
# Initialize strategy
strategy = BBRSIncrementalState(config)
# Simulate real-time minute-by-minute processing
results = []
minute_count = 0
bar_count = 0
logging.info(f"Processing {len(minute_data)} minute-level data points...")
for timestamp, row in minute_data.iterrows():
minute_count += 1
# Prepare minute-level OHLCV data
minute_ohlcv = {
'open': row['open'],
'high': row['high'],
'low': row['low'],
'close': row['close'],
'volume': row['volume']
}
# Update strategy with minute data
result = strategy.update_minute_data(timestamp, minute_ohlcv)
if result is not None:
# A timeframe bar completed
bar_count += 1
results.append(result)
# Log significant events
if result['buy_signal']:
logging.info(f"🟢 BUY SIGNAL at {result['timestamp']} (Bar #{bar_count})")
logging.info(f" Price: {result['close']:.2f}, RSI: {result['rsi']:.2f}, Regime: {result['market_regime']}")
if result['sell_signal']:
logging.info(f"🔴 SELL SIGNAL at {result['timestamp']} (Bar #{bar_count})")
logging.info(f" Price: {result['close']:.2f}, RSI: {result['rsi']:.2f}, Regime: {result['market_regime']}")
# Log every 10th bar for monitoring
if bar_count % 10 == 0:
logging.info(f"Processed {minute_count} minutes → {bar_count} {timeframe_minutes}min bars")
logging.info(f" Current: Price={result['close']:.2f}, RSI={result['rsi']:.2f}, Regime={result['market_regime']}")
# Show current incomplete bar
incomplete_bar = strategy.get_current_incomplete_bar()
if incomplete_bar:
logging.info(f" Incomplete bar: Volume={incomplete_bar['volume']:.0f}")
# Final statistics
logging.info(f"\n📊 {timeframe_minutes}-minute Timeframe Results:")
logging.info(f" Minutes processed: {minute_count}")
logging.info(f" Bars generated: {bar_count}")
logging.info(f" Expected bars: ~{minute_count // timeframe_minutes}")
logging.info(f" Strategy warmed up: {strategy.is_warmed_up()}")
if results:
results_df = pd.DataFrame(results)
buy_signals = results_df['buy_signal'].sum()
sell_signals = results_df['sell_signal'].sum()
logging.info(f" Buy signals: {buy_signals}")
logging.info(f" Sell signals: {sell_signals}")
# Show regime distribution
regime_counts = results_df['market_regime'].value_counts()
logging.info(f" Market regimes: {dict(regime_counts)}")
# Plot results for this timeframe
plot_timeframe_results(results_df, timeframe_minutes)
def test_consistency_with_pre_aggregated():
"""Test that minute-level processing produces same results as pre-aggregated data."""
logging.info(f"\n{'='*60}")
logging.info("Testing consistency: Minute-level vs Pre-aggregated")
logging.info(f"{'='*60}")
# Load minute data
minute_data = load_minute_data()
if minute_data is None:
return
# Use smaller dataset for detailed comparison
test_data = minute_data.iloc[:1440].copy() # 24 hours of minute data
timeframe_minutes = 60 # 1 hour
config = {
"timeframe_minutes": timeframe_minutes,
"bb_period": 20,
"rsi_period": 14,
"bb_width": 0.05,
"trending": {
"rsi_threshold": [30, 70],
"bb_std_dev_multiplier": 2.5,
},
"sideways": {
"rsi_threshold": [40, 60],
"bb_std_dev_multiplier": 1.8,
},
"SqueezeStrategy": True
}
# Method 1: Process minute-by-minute (real-time simulation)
logging.info("Method 1: Processing minute-by-minute...")
strategy_realtime = BBRSIncrementalState(config)
realtime_results = []
for timestamp, row in test_data.iterrows():
minute_ohlcv = {
'open': row['open'],
'high': row['high'],
'low': row['low'],
'close': row['close'],
'volume': row['volume']
}
result = strategy_realtime.update_minute_data(timestamp, minute_ohlcv)
if result is not None:
realtime_results.append(result)
# Method 2: Pre-aggregate and process (traditional method)
logging.info("Method 2: Processing pre-aggregated data...")
from cycles.utils.data_utils import aggregate_to_hourly
hourly_data = aggregate_to_hourly(test_data, 1)
strategy_batch = BBRSIncrementalState(config)
batch_results = []
for timestamp, row in hourly_data.iterrows():
hourly_ohlcv = {
'open': row['open'],
'high': row['high'],
'low': row['low'],
'close': row['close'],
'volume': row['volume']
}
result = strategy_batch.update(hourly_ohlcv)
batch_results.append(result)
# Compare results
logging.info("Comparing results...")
realtime_df = pd.DataFrame(realtime_results)
batch_df = pd.DataFrame(batch_results)
logging.info(f"Real-time bars: {len(realtime_df)}")
logging.info(f"Batch bars: {len(batch_df)}")
if len(realtime_df) > 0 and len(batch_df) > 0:
# Compare after warm-up
warmup_bars = 25 # Conservative warm-up period
if len(realtime_df) > warmup_bars and len(batch_df) > warmup_bars:
rt_warmed = realtime_df.iloc[warmup_bars:]
batch_warmed = batch_df.iloc[warmup_bars:]
# Align by taking minimum length
min_len = min(len(rt_warmed), len(batch_warmed))
rt_aligned = rt_warmed.iloc[:min_len]
batch_aligned = batch_warmed.iloc[:min_len]
logging.info(f"Comparing {min_len} aligned bars after warm-up...")
# Compare key metrics
comparisons = [
('close', 'Close Price'),
('rsi', 'RSI'),
('upper_band', 'Upper Band'),
('lower_band', 'Lower Band'),
('middle_band', 'Middle Band'),
('buy_signal', 'Buy Signal'),
('sell_signal', 'Sell Signal')
]
for col, name in comparisons:
if col in rt_aligned.columns and col in batch_aligned.columns:
if col in ['buy_signal', 'sell_signal']:
# Boolean comparison
match_rate = (rt_aligned[col] == batch_aligned[col]).mean()
logging.info(f"{name}: {match_rate:.4f} match rate ({match_rate*100:.2f}%)")
else:
# Numerical comparison
diff = np.abs(rt_aligned[col] - batch_aligned[col])
max_diff = diff.max()
mean_diff = diff.mean()
logging.info(f"{name}: Max diff={max_diff:.6f}, Mean diff={mean_diff:.6f}")
# Plot comparison
plot_consistency_comparison(rt_aligned, batch_aligned)
def plot_timeframe_results(results_df, timeframe_minutes):
"""Plot results for a specific timeframe."""
if len(results_df) < 10:
logging.warning(f"Not enough data to plot for {timeframe_minutes}min timeframe")
return
fig, axes = plt.subplots(3, 1, figsize=(15, 10))
# Plot 1: Price and Bollinger Bands
axes[0].plot(results_df.index, results_df['close'], 'k-', label='Close Price', alpha=0.8)
axes[0].plot(results_df.index, results_df['upper_band'], 'b-', label='Upper Band', alpha=0.7)
axes[0].plot(results_df.index, results_df['middle_band'], 'g-', label='Middle Band', alpha=0.7)
axes[0].plot(results_df.index, results_df['lower_band'], 'r-', label='Lower Band', alpha=0.7)
# Mark signals
buy_signals = results_df[results_df['buy_signal']]
sell_signals = results_df[results_df['sell_signal']]
if len(buy_signals) > 0:
axes[0].scatter(buy_signals.index, buy_signals['close'],
color='green', marker='^', s=100, label='Buy Signal', zorder=5)
if len(sell_signals) > 0:
axes[0].scatter(sell_signals.index, sell_signals['close'],
color='red', marker='v', s=100, label='Sell Signal', zorder=5)
axes[0].set_title(f'{timeframe_minutes}-minute Timeframe: Price and Bollinger Bands')
axes[0].legend()
axes[0].grid(True)
# Plot 2: RSI
axes[1].plot(results_df.index, results_df['rsi'], 'purple', label='RSI', alpha=0.8)
axes[1].axhline(y=70, color='red', linestyle='--', alpha=0.5, label='Overbought')
axes[1].axhline(y=30, color='green', linestyle='--', alpha=0.5, label='Oversold')
axes[1].set_title('RSI')
axes[1].legend()
axes[1].grid(True)
axes[1].set_ylim(0, 100)
# Plot 3: Market Regime
regime_numeric = [1 if regime == 'sideways' else 0 for regime in results_df['market_regime']]
axes[2].plot(results_df.index, regime_numeric, 'orange', label='Market Regime', alpha=0.8)
axes[2].set_title('Market Regime (1=Sideways, 0=Trending)')
axes[2].legend()
axes[2].grid(True)
axes[2].set_ylim(-0.1, 1.1)
plt.tight_layout()
save_path = f"realtime_bbrs_{timeframe_minutes}min.png"
plt.savefig(save_path, dpi=300, bbox_inches='tight')
logging.info(f"Plot saved to {save_path}")
plt.show()
def plot_consistency_comparison(realtime_df, batch_df):
"""Plot comparison between real-time and batch processing."""
fig, axes = plt.subplots(2, 1, figsize=(15, 8))
# Plot 1: Price and signals comparison
axes[0].plot(realtime_df.index, realtime_df['close'], 'k-', label='Price', alpha=0.8)
# Real-time signals
rt_buy = realtime_df[realtime_df['buy_signal']]
rt_sell = realtime_df[realtime_df['sell_signal']]
if len(rt_buy) > 0:
axes[0].scatter(rt_buy.index, rt_buy['close'],
color='green', marker='^', s=80, label='Real-time Buy', alpha=0.8)
if len(rt_sell) > 0:
axes[0].scatter(rt_sell.index, rt_sell['close'],
color='red', marker='v', s=80, label='Real-time Sell', alpha=0.8)
# Batch signals
batch_buy = batch_df[batch_df['buy_signal']]
batch_sell = batch_df[batch_df['sell_signal']]
if len(batch_buy) > 0:
axes[0].scatter(batch_buy.index, batch_buy['close'],
color='lightgreen', marker='s', s=60, label='Batch Buy', alpha=0.6)
if len(batch_sell) > 0:
axes[0].scatter(batch_sell.index, batch_sell['close'],
color='lightcoral', marker='s', s=60, label='Batch Sell', alpha=0.6)
axes[0].set_title('Signal Comparison: Real-time vs Batch Processing')
axes[0].legend()
axes[0].grid(True)
# Plot 2: RSI comparison
axes[1].plot(realtime_df.index, realtime_df['rsi'], 'b-', label='Real-time RSI', alpha=0.8)
axes[1].plot(batch_df.index, batch_df['rsi'], 'r--', label='Batch RSI', alpha=0.8)
axes[1].set_title('RSI Comparison')
axes[1].legend()
axes[1].grid(True)
plt.tight_layout()
save_path = "realtime_vs_batch_comparison.png"
plt.savefig(save_path, dpi=300, bbox_inches='tight')
logging.info(f"Comparison plot saved to {save_path}")
plt.show()
def main():
"""Main test function."""
logging.info("Starting real-time BBRS strategy validation test")
try:
# Test 1: Different timeframe aggregations
test_timeframe_aggregation()
# Test 2: Consistency with pre-aggregated data
test_consistency_with_pre_aggregated()
logging.info("Real-time BBRS strategy test completed successfully!")
except Exception as e:
logging.error(f"Test failed with error: {e}")
raise
if __name__ == "__main__":
main()