Cycles/test/real_data_alignment_test.py

343 lines
13 KiB
Python
Raw Normal View History

2025-05-28 18:26:51 +08:00
#!/usr/bin/env python3
"""
Real data alignment test with BTC data limited to 4 hours for clear visualization.
"""
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.patches import Rectangle
import sys
import os
# Add the project root to Python path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from IncrementalTrader.utils import aggregate_minute_data_to_timeframe, parse_timeframe_to_minutes
def load_btc_data_4hours(file_path: str) -> list:
"""
Load 4 hours of BTC minute data from CSV file.
Args:
file_path: Path to the CSV file
Returns:
List of minute OHLCV data dictionaries
"""
print(f"📊 Loading 4 hours of BTC data from {file_path}")
try:
# Load the CSV file
df = pd.read_csv(file_path)
print(f" 📈 Loaded {len(df)} total rows")
# Handle Unix timestamp format
if 'Timestamp' in df.columns:
print(f" 🕐 Converting Unix timestamps...")
df['timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
# Standardize column names
column_mapping = {}
for col in df.columns:
col_lower = col.lower()
if 'open' in col_lower:
column_mapping[col] = 'open'
elif 'high' in col_lower:
column_mapping[col] = 'high'
elif 'low' in col_lower:
column_mapping[col] = 'low'
elif 'close' in col_lower:
column_mapping[col] = 'close'
elif 'volume' in col_lower:
column_mapping[col] = 'volume'
df = df.rename(columns=column_mapping)
# Remove rows with zero or invalid prices
initial_len = len(df)
df = df[(df['open'] > 0) & (df['high'] > 0) & (df['low'] > 0) & (df['close'] > 0)]
if len(df) < initial_len:
print(f" 🧹 Removed {initial_len - len(df)} rows with invalid prices")
# Sort by timestamp
df = df.sort_values('timestamp')
# Find a good 4-hour period with active trading
print(f" 📅 Finding a good 4-hour period...")
# Group by date and find dates with good data
df['date'] = df['timestamp'].dt.date
date_counts = df.groupby('date').size()
good_dates = date_counts[date_counts >= 1000].index # Dates with lots of data
if len(good_dates) == 0:
print(f" ❌ No dates with sufficient data found")
return []
# Pick a recent date with good data
selected_date = good_dates[-1]
df_date = df[df['date'] == selected_date].copy()
print(f" ✅ Selected date: {selected_date} with {len(df_date)} data points")
# Find a 4-hour period with good price movement
# Look for periods with reasonable price volatility
df_date['hour'] = df_date['timestamp'].dt.hour
best_start_hour = None
best_volatility = 0
# Try different 4-hour windows
for start_hour in range(0, 21): # 0-20 (so 4-hour window fits in 24h)
end_hour = start_hour + 4
window_data = df_date[
(df_date['hour'] >= start_hour) &
(df_date['hour'] < end_hour)
]
if len(window_data) >= 200: # At least 200 minutes of data
# Calculate volatility as price range
price_range = window_data['high'].max() - window_data['low'].min()
avg_price = window_data['close'].mean()
volatility = price_range / avg_price if avg_price > 0 else 0
if volatility > best_volatility:
best_volatility = volatility
best_start_hour = start_hour
if best_start_hour is None:
# Fallback: just take first 4 hours of data
df_4h = df_date.head(240) # 4 hours = 240 minutes
print(f" 📊 Using first 4 hours as fallback")
else:
end_hour = best_start_hour + 4
df_4h = df_date[
(df_date['hour'] >= best_start_hour) &
(df_date['hour'] < end_hour)
].head(240) # Limit to 240 minutes max
print(f" 📊 Selected 4-hour window: {best_start_hour:02d}:00 - {end_hour:02d}:00")
print(f" 📈 Price volatility: {best_volatility:.4f}")
print(f" ✅ Final dataset: {len(df_4h)} rows from {df_4h['timestamp'].min()} to {df_4h['timestamp'].max()}")
# Convert to list of dictionaries
minute_data = []
for _, row in df_4h.iterrows():
minute_data.append({
'timestamp': row['timestamp'],
'open': float(row['open']),
'high': float(row['high']),
'low': float(row['low']),
'close': float(row['close']),
'volume': float(row['volume'])
})
return minute_data
except Exception as e:
print(f" ❌ Error loading data: {e}")
import traceback
traceback.print_exc()
return []
def plot_timeframe_bars(ax, data, timeframe, color, alpha=0.7, show_labels=True):
"""Plot timeframe bars with clear boundaries."""
if not data:
return
timeframe_minutes = parse_timeframe_to_minutes(timeframe)
for i, bar in enumerate(data):
timestamp = bar['timestamp']
open_price = bar['open']
high_price = bar['high']
low_price = bar['low']
close_price = bar['close']
# Calculate bar boundaries (end timestamp mode)
bar_start = timestamp - pd.Timedelta(minutes=timeframe_minutes)
bar_end = timestamp
# Draw the bar as a rectangle spanning the full time period
body_height = abs(close_price - open_price)
body_bottom = min(open_price, close_price)
# Determine color based on bullish/bearish
if close_price >= open_price:
# Bullish - use green tint
bar_color = 'lightgreen' if color == 'green' else color
edge_color = 'darkgreen'
else:
# Bearish - use red tint
bar_color = 'lightcoral' if color == 'green' else color
edge_color = 'darkred'
# Bar body
rect = Rectangle((bar_start, body_bottom),
bar_end - bar_start, body_height,
facecolor=bar_color, edgecolor=edge_color,
alpha=alpha, linewidth=1)
ax.add_patch(rect)
# High-low wick at center
bar_center = bar_start + (bar_end - bar_start) / 2
ax.plot([bar_center, bar_center], [low_price, high_price],
color=edge_color, linewidth=2, alpha=alpha)
# Add labels for smaller timeframes
if show_labels and timeframe in ["5min", "15min"]:
ax.text(bar_center, high_price + (high_price * 0.001), f"{timeframe}\n#{i+1}",
ha='center', va='bottom', fontsize=7, fontweight='bold')
def create_real_data_alignment_visualization(minute_data):
"""Create a clear visualization of timeframe alignment with real data."""
print("🎯 Creating Real Data Timeframe Alignment Visualization")
print("=" * 60)
if not minute_data:
print("❌ No data to visualize")
return None
print(f"📊 Using {len(minute_data)} minute data points")
print(f"📅 Range: {minute_data[0]['timestamp']} to {minute_data[-1]['timestamp']}")
# Show price range
prices = [d['close'] for d in minute_data]
print(f"💰 Price range: ${min(prices):.2f} - ${max(prices):.2f}")
# Aggregate to different timeframes
timeframes = ["5min", "15min", "30min", "1h"]
colors = ['red', 'green', 'blue', 'purple']
alphas = [0.8, 0.6, 0.4, 0.2]
aggregated_data = {}
for tf in timeframes:
aggregated_data[tf] = aggregate_minute_data_to_timeframe(minute_data, tf, "end")
print(f" {tf}: {len(aggregated_data[tf])} bars")
# Create visualization
fig, ax = plt.subplots(1, 1, figsize=(18, 10))
fig.suptitle('Real BTC Data - Timeframe Alignment Visualization\n(4 hours of real market data)',
fontsize=16, fontweight='bold')
# Plot timeframes from largest to smallest (background to foreground)
for i, tf in enumerate(reversed(timeframes)):
color = colors[timeframes.index(tf)]
alpha = alphas[timeframes.index(tf)]
show_labels = (tf in ["5min", "15min"]) # Only label smaller timeframes for clarity
plot_timeframe_bars(ax, aggregated_data[tf], tf, color, alpha, show_labels)
# Format the plot
ax.set_ylabel('Price (USD)', fontsize=12)
ax.set_xlabel('Time', fontsize=12)
ax.grid(True, alpha=0.3)
# Format x-axis
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))
ax.xaxis.set_minor_locator(mdates.MinuteLocator(interval=30))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
# Add legend
legend_elements = []
for i, tf in enumerate(timeframes):
legend_elements.append(plt.Rectangle((0,0),1,1,
facecolor=colors[i],
alpha=alphas[i],
label=f"{tf} ({len(aggregated_data[tf])} bars)"))
ax.legend(handles=legend_elements, loc='upper left', fontsize=10)
# Add explanation
explanation = ("Real BTC market data showing timeframe alignment.\n"
"Green bars = bullish (close > open), Red bars = bearish (close < open).\n"
"Each bar spans its full time period - smaller timeframes fit inside larger ones.")
ax.text(0.02, 0.98, explanation, transform=ax.transAxes,
verticalalignment='top', fontsize=10,
bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.9))
plt.tight_layout()
# Print alignment verification
print(f"\n🔍 Alignment Verification:")
bars_5m = aggregated_data["5min"]
bars_15m = aggregated_data["15min"]
for i, bar_15m in enumerate(bars_15m):
print(f"\n15min bar {i+1}: {bar_15m['timestamp']} | ${bar_15m['open']:.2f} -> ${bar_15m['close']:.2f}")
bar_15m_start = bar_15m['timestamp'] - pd.Timedelta(minutes=15)
contained_5m = []
for bar_5m in bars_5m:
bar_5m_start = bar_5m['timestamp'] - pd.Timedelta(minutes=5)
bar_5m_end = bar_5m['timestamp']
# Check if 5min bar is contained within 15min bar
if bar_15m_start <= bar_5m_start and bar_5m_end <= bar_15m['timestamp']:
contained_5m.append(bar_5m)
print(f" Contains {len(contained_5m)} x 5min bars:")
for j, bar_5m in enumerate(contained_5m):
print(f" {j+1}. {bar_5m['timestamp']} | ${bar_5m['open']:.2f} -> ${bar_5m['close']:.2f}")
if len(contained_5m) != 3:
print(f" ❌ ALIGNMENT ISSUE: Expected 3 bars, found {len(contained_5m)}")
else:
print(f" ✅ Alignment OK")
return fig
def main():
"""Main function."""
print("🚀 Real Data Timeframe Alignment Test")
print("=" * 45)
# Configuration
data_file = "./data/btcusd_1-min_data.csv"
# Check if data file exists
if not os.path.exists(data_file):
print(f"❌ Data file not found: {data_file}")
print("Please ensure the BTC data file exists in the ./data/ directory")
return False
try:
# Load 4 hours of real data
minute_data = load_btc_data_4hours(data_file)
if not minute_data:
print("❌ Failed to load data")
return False
# Create visualization
fig = create_real_data_alignment_visualization(minute_data)
if fig:
plt.show()
print("\n✅ Real data alignment test completed!")
print("📊 In the chart, you should see:")
print(" - Real BTC price movements over 4 hours")
print(" - Each 15min bar contains exactly 3 x 5min bars")
print(" - Each 30min bar contains exactly 6 x 5min bars")
print(" - Each 1h bar contains exactly 12 x 5min bars")
print(" - All bars are properly aligned with no gaps or overlaps")
print(" - Green bars = bullish periods, Red bars = bearish periods")
return True
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)