139 lines
4.8 KiB
Python
139 lines
4.8 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Debug script to investigate timeframe alignment issues.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
|
||
|
|
# Add the project root to Python path
|
||
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
|
|
||
|
|
from IncrementalTrader.utils import aggregate_minute_data_to_timeframe, parse_timeframe_to_minutes
|
||
|
|
|
||
|
|
|
||
|
|
def create_test_data():
|
||
|
|
"""Create simple test data to debug alignment."""
|
||
|
|
start_time = pd.Timestamp('2024-01-01 09:00:00')
|
||
|
|
minute_data = []
|
||
|
|
|
||
|
|
# Create exactly 60 minutes of data (4 complete 15-min bars)
|
||
|
|
for i in range(60):
|
||
|
|
timestamp = start_time + pd.Timedelta(minutes=i)
|
||
|
|
minute_data.append({
|
||
|
|
'timestamp': timestamp,
|
||
|
|
'open': 100.0 + i * 0.1,
|
||
|
|
'high': 100.5 + i * 0.1,
|
||
|
|
'low': 99.5 + i * 0.1,
|
||
|
|
'close': 100.2 + i * 0.1,
|
||
|
|
'volume': 1000 + i * 10
|
||
|
|
})
|
||
|
|
|
||
|
|
return minute_data
|
||
|
|
|
||
|
|
|
||
|
|
def debug_aggregation():
|
||
|
|
"""Debug the aggregation alignment."""
|
||
|
|
print("🔍 Debugging Timeframe Alignment")
|
||
|
|
print("=" * 50)
|
||
|
|
|
||
|
|
# Create test data
|
||
|
|
minute_data = create_test_data()
|
||
|
|
print(f"📊 Created {len(minute_data)} minute data points")
|
||
|
|
print(f"📅 Range: {minute_data[0]['timestamp']} to {minute_data[-1]['timestamp']}")
|
||
|
|
|
||
|
|
# Test different timeframes
|
||
|
|
timeframes = ["5min", "15min", "30min", "1h"]
|
||
|
|
|
||
|
|
for tf in timeframes:
|
||
|
|
print(f"\n🔄 Aggregating to {tf}...")
|
||
|
|
bars = aggregate_minute_data_to_timeframe(minute_data, tf, "end")
|
||
|
|
print(f" ✅ Generated {len(bars)} bars")
|
||
|
|
|
||
|
|
for i, bar in enumerate(bars):
|
||
|
|
print(f" Bar {i+1}: {bar['timestamp']} | O={bar['open']:.1f} H={bar['high']:.1f} L={bar['low']:.1f} C={bar['close']:.1f}")
|
||
|
|
|
||
|
|
# Now let's check alignment specifically
|
||
|
|
print(f"\n🎯 Checking Alignment:")
|
||
|
|
|
||
|
|
# Get 5min and 15min bars
|
||
|
|
bars_5m = aggregate_minute_data_to_timeframe(minute_data, "5min", "end")
|
||
|
|
bars_15m = aggregate_minute_data_to_timeframe(minute_data, "15min", "end")
|
||
|
|
|
||
|
|
print(f"\n5-minute bars ({len(bars_5m)}):")
|
||
|
|
for i, bar in enumerate(bars_5m):
|
||
|
|
print(f" {i+1:2d}. {bar['timestamp']} | O={bar['open']:.1f} C={bar['close']:.1f}")
|
||
|
|
|
||
|
|
print(f"\n15-minute bars ({len(bars_15m)}):")
|
||
|
|
for i, bar in enumerate(bars_15m):
|
||
|
|
print(f" {i+1:2d}. {bar['timestamp']} | O={bar['open']:.1f} C={bar['close']:.1f}")
|
||
|
|
|
||
|
|
# Check if 5min bars align with 15min bars
|
||
|
|
print(f"\n🔍 Alignment Check:")
|
||
|
|
for i, bar_15m in enumerate(bars_15m):
|
||
|
|
print(f"\n15min bar {i+1}: {bar_15m['timestamp']}")
|
||
|
|
|
||
|
|
# Find corresponding 5min bars
|
||
|
|
bar_15m_start = bar_15m['timestamp'] - pd.Timedelta(minutes=15)
|
||
|
|
bar_15m_end = bar_15m['timestamp']
|
||
|
|
|
||
|
|
corresponding_5m = []
|
||
|
|
for bar_5m in bars_5m:
|
||
|
|
if bar_15m_start < bar_5m['timestamp'] <= bar_15m_end:
|
||
|
|
corresponding_5m.append(bar_5m)
|
||
|
|
|
||
|
|
print(f" Should contain 3 x 5min bars from {bar_15m_start} to {bar_15m_end}")
|
||
|
|
print(f" Found {len(corresponding_5m)} x 5min bars:")
|
||
|
|
for j, bar_5m in enumerate(corresponding_5m):
|
||
|
|
print(f" {j+1}. {bar_5m['timestamp']}")
|
||
|
|
|
||
|
|
if len(corresponding_5m) != 3:
|
||
|
|
print(f" ❌ ALIGNMENT ISSUE: Expected 3 bars, found {len(corresponding_5m)}")
|
||
|
|
else:
|
||
|
|
print(f" ✅ Alignment OK")
|
||
|
|
|
||
|
|
|
||
|
|
def test_pandas_resampling():
|
||
|
|
"""Test pandas resampling directly to compare."""
|
||
|
|
print(f"\n📊 Testing Pandas Resampling Directly")
|
||
|
|
print("=" * 40)
|
||
|
|
|
||
|
|
# Create test data as DataFrame
|
||
|
|
start_time = pd.Timestamp('2024-01-01 09:00:00')
|
||
|
|
timestamps = [start_time + pd.Timedelta(minutes=i) for i in range(60)]
|
||
|
|
|
||
|
|
df = pd.DataFrame({
|
||
|
|
'timestamp': timestamps,
|
||
|
|
'open': [100.0 + i * 0.1 for i in range(60)],
|
||
|
|
'high': [100.5 + i * 0.1 for i in range(60)],
|
||
|
|
'low': [99.5 + i * 0.1 for i in range(60)],
|
||
|
|
'close': [100.2 + i * 0.1 for i in range(60)],
|
||
|
|
'volume': [1000 + i * 10 for i in range(60)]
|
||
|
|
})
|
||
|
|
|
||
|
|
df = df.set_index('timestamp')
|
||
|
|
|
||
|
|
print(f"Original data range: {df.index[0]} to {df.index[-1]}")
|
||
|
|
|
||
|
|
# Test different label modes
|
||
|
|
for label_mode in ['right', 'left']:
|
||
|
|
print(f"\n🏷️ Testing label='{label_mode}':")
|
||
|
|
|
||
|
|
for tf in ['5min', '15min']:
|
||
|
|
resampled = df.resample(tf, label=label_mode).agg({
|
||
|
|
'open': 'first',
|
||
|
|
'high': 'max',
|
||
|
|
'low': 'min',
|
||
|
|
'close': 'last',
|
||
|
|
'volume': 'sum'
|
||
|
|
}).dropna()
|
||
|
|
|
||
|
|
print(f" {tf} ({len(resampled)} bars):")
|
||
|
|
for i, (ts, row) in enumerate(resampled.iterrows()):
|
||
|
|
print(f" {i+1}. {ts} | O={row['open']:.1f} C={row['close']:.1f}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
debug_aggregation()
|
||
|
|
test_pandas_resampling()
|