343 lines
13 KiB
Python
343 lines
13 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Real data alignment test with BTC data limited to 4 hours for clear visualization.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
import matplotlib.pyplot as plt
|
||
|
|
import matplotlib.dates as mdates
|
||
|
|
from matplotlib.patches import Rectangle
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
|
||
|
|
# Add the project root to Python path
|
||
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
|
|
||
|
|
from IncrementalTrader.utils import aggregate_minute_data_to_timeframe, parse_timeframe_to_minutes
|
||
|
|
|
||
|
|
|
||
|
|
def load_btc_data_4hours(file_path: str) -> list:
|
||
|
|
"""
|
||
|
|
Load 4 hours of BTC minute data from CSV file.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
file_path: Path to the CSV file
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
List of minute OHLCV data dictionaries
|
||
|
|
"""
|
||
|
|
print(f"📊 Loading 4 hours of BTC data from {file_path}")
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Load the CSV file
|
||
|
|
df = pd.read_csv(file_path)
|
||
|
|
print(f" 📈 Loaded {len(df)} total rows")
|
||
|
|
|
||
|
|
# Handle Unix timestamp format
|
||
|
|
if 'Timestamp' in df.columns:
|
||
|
|
print(f" 🕐 Converting Unix timestamps...")
|
||
|
|
df['timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
|
||
|
|
|
||
|
|
# Standardize column names
|
||
|
|
column_mapping = {}
|
||
|
|
for col in df.columns:
|
||
|
|
col_lower = col.lower()
|
||
|
|
if 'open' in col_lower:
|
||
|
|
column_mapping[col] = 'open'
|
||
|
|
elif 'high' in col_lower:
|
||
|
|
column_mapping[col] = 'high'
|
||
|
|
elif 'low' in col_lower:
|
||
|
|
column_mapping[col] = 'low'
|
||
|
|
elif 'close' in col_lower:
|
||
|
|
column_mapping[col] = 'close'
|
||
|
|
elif 'volume' in col_lower:
|
||
|
|
column_mapping[col] = 'volume'
|
||
|
|
|
||
|
|
df = df.rename(columns=column_mapping)
|
||
|
|
|
||
|
|
# Remove rows with zero or invalid prices
|
||
|
|
initial_len = len(df)
|
||
|
|
df = df[(df['open'] > 0) & (df['high'] > 0) & (df['low'] > 0) & (df['close'] > 0)]
|
||
|
|
if len(df) < initial_len:
|
||
|
|
print(f" 🧹 Removed {initial_len - len(df)} rows with invalid prices")
|
||
|
|
|
||
|
|
# Sort by timestamp
|
||
|
|
df = df.sort_values('timestamp')
|
||
|
|
|
||
|
|
# Find a good 4-hour period with active trading
|
||
|
|
print(f" 📅 Finding a good 4-hour period...")
|
||
|
|
|
||
|
|
# Group by date and find dates with good data
|
||
|
|
df['date'] = df['timestamp'].dt.date
|
||
|
|
date_counts = df.groupby('date').size()
|
||
|
|
good_dates = date_counts[date_counts >= 1000].index # Dates with lots of data
|
||
|
|
|
||
|
|
if len(good_dates) == 0:
|
||
|
|
print(f" ❌ No dates with sufficient data found")
|
||
|
|
return []
|
||
|
|
|
||
|
|
# Pick a recent date with good data
|
||
|
|
selected_date = good_dates[-1]
|
||
|
|
df_date = df[df['date'] == selected_date].copy()
|
||
|
|
print(f" ✅ Selected date: {selected_date} with {len(df_date)} data points")
|
||
|
|
|
||
|
|
# Find a 4-hour period with good price movement
|
||
|
|
# Look for periods with reasonable price volatility
|
||
|
|
df_date['hour'] = df_date['timestamp'].dt.hour
|
||
|
|
|
||
|
|
best_start_hour = None
|
||
|
|
best_volatility = 0
|
||
|
|
|
||
|
|
# Try different 4-hour windows
|
||
|
|
for start_hour in range(0, 21): # 0-20 (so 4-hour window fits in 24h)
|
||
|
|
end_hour = start_hour + 4
|
||
|
|
window_data = df_date[
|
||
|
|
(df_date['hour'] >= start_hour) &
|
||
|
|
(df_date['hour'] < end_hour)
|
||
|
|
]
|
||
|
|
|
||
|
|
if len(window_data) >= 200: # At least 200 minutes of data
|
||
|
|
# Calculate volatility as price range
|
||
|
|
price_range = window_data['high'].max() - window_data['low'].min()
|
||
|
|
avg_price = window_data['close'].mean()
|
||
|
|
volatility = price_range / avg_price if avg_price > 0 else 0
|
||
|
|
|
||
|
|
if volatility > best_volatility:
|
||
|
|
best_volatility = volatility
|
||
|
|
best_start_hour = start_hour
|
||
|
|
|
||
|
|
if best_start_hour is None:
|
||
|
|
# Fallback: just take first 4 hours of data
|
||
|
|
df_4h = df_date.head(240) # 4 hours = 240 minutes
|
||
|
|
print(f" 📊 Using first 4 hours as fallback")
|
||
|
|
else:
|
||
|
|
end_hour = best_start_hour + 4
|
||
|
|
df_4h = df_date[
|
||
|
|
(df_date['hour'] >= best_start_hour) &
|
||
|
|
(df_date['hour'] < end_hour)
|
||
|
|
].head(240) # Limit to 240 minutes max
|
||
|
|
print(f" 📊 Selected 4-hour window: {best_start_hour:02d}:00 - {end_hour:02d}:00")
|
||
|
|
print(f" 📈 Price volatility: {best_volatility:.4f}")
|
||
|
|
|
||
|
|
print(f" ✅ Final dataset: {len(df_4h)} rows from {df_4h['timestamp'].min()} to {df_4h['timestamp'].max()}")
|
||
|
|
|
||
|
|
# Convert to list of dictionaries
|
||
|
|
minute_data = []
|
||
|
|
for _, row in df_4h.iterrows():
|
||
|
|
minute_data.append({
|
||
|
|
'timestamp': row['timestamp'],
|
||
|
|
'open': float(row['open']),
|
||
|
|
'high': float(row['high']),
|
||
|
|
'low': float(row['low']),
|
||
|
|
'close': float(row['close']),
|
||
|
|
'volume': float(row['volume'])
|
||
|
|
})
|
||
|
|
|
||
|
|
return minute_data
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f" ❌ Error loading data: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|
||
|
|
return []
|
||
|
|
|
||
|
|
|
||
|
|
def plot_timeframe_bars(ax, data, timeframe, color, alpha=0.7, show_labels=True):
|
||
|
|
"""Plot timeframe bars with clear boundaries."""
|
||
|
|
if not data:
|
||
|
|
return
|
||
|
|
|
||
|
|
timeframe_minutes = parse_timeframe_to_minutes(timeframe)
|
||
|
|
|
||
|
|
for i, bar in enumerate(data):
|
||
|
|
timestamp = bar['timestamp']
|
||
|
|
open_price = bar['open']
|
||
|
|
high_price = bar['high']
|
||
|
|
low_price = bar['low']
|
||
|
|
close_price = bar['close']
|
||
|
|
|
||
|
|
# Calculate bar boundaries (end timestamp mode)
|
||
|
|
bar_start = timestamp - pd.Timedelta(minutes=timeframe_minutes)
|
||
|
|
bar_end = timestamp
|
||
|
|
|
||
|
|
# Draw the bar as a rectangle spanning the full time period
|
||
|
|
body_height = abs(close_price - open_price)
|
||
|
|
body_bottom = min(open_price, close_price)
|
||
|
|
|
||
|
|
# Determine color based on bullish/bearish
|
||
|
|
if close_price >= open_price:
|
||
|
|
# Bullish - use green tint
|
||
|
|
bar_color = 'lightgreen' if color == 'green' else color
|
||
|
|
edge_color = 'darkgreen'
|
||
|
|
else:
|
||
|
|
# Bearish - use red tint
|
||
|
|
bar_color = 'lightcoral' if color == 'green' else color
|
||
|
|
edge_color = 'darkred'
|
||
|
|
|
||
|
|
# Bar body
|
||
|
|
rect = Rectangle((bar_start, body_bottom),
|
||
|
|
bar_end - bar_start, body_height,
|
||
|
|
facecolor=bar_color, edgecolor=edge_color,
|
||
|
|
alpha=alpha, linewidth=1)
|
||
|
|
ax.add_patch(rect)
|
||
|
|
|
||
|
|
# High-low wick at center
|
||
|
|
bar_center = bar_start + (bar_end - bar_start) / 2
|
||
|
|
ax.plot([bar_center, bar_center], [low_price, high_price],
|
||
|
|
color=edge_color, linewidth=2, alpha=alpha)
|
||
|
|
|
||
|
|
# Add labels for smaller timeframes
|
||
|
|
if show_labels and timeframe in ["5min", "15min"]:
|
||
|
|
ax.text(bar_center, high_price + (high_price * 0.001), f"{timeframe}\n#{i+1}",
|
||
|
|
ha='center', va='bottom', fontsize=7, fontweight='bold')
|
||
|
|
|
||
|
|
|
||
|
|
def create_real_data_alignment_visualization(minute_data):
|
||
|
|
"""Create a clear visualization of timeframe alignment with real data."""
|
||
|
|
print("🎯 Creating Real Data Timeframe Alignment Visualization")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
if not minute_data:
|
||
|
|
print("❌ No data to visualize")
|
||
|
|
return None
|
||
|
|
|
||
|
|
print(f"📊 Using {len(minute_data)} minute data points")
|
||
|
|
print(f"📅 Range: {minute_data[0]['timestamp']} to {minute_data[-1]['timestamp']}")
|
||
|
|
|
||
|
|
# Show price range
|
||
|
|
prices = [d['close'] for d in minute_data]
|
||
|
|
print(f"💰 Price range: ${min(prices):.2f} - ${max(prices):.2f}")
|
||
|
|
|
||
|
|
# Aggregate to different timeframes
|
||
|
|
timeframes = ["5min", "15min", "30min", "1h"]
|
||
|
|
colors = ['red', 'green', 'blue', 'purple']
|
||
|
|
alphas = [0.8, 0.6, 0.4, 0.2]
|
||
|
|
|
||
|
|
aggregated_data = {}
|
||
|
|
for tf in timeframes:
|
||
|
|
aggregated_data[tf] = aggregate_minute_data_to_timeframe(minute_data, tf, "end")
|
||
|
|
print(f" {tf}: {len(aggregated_data[tf])} bars")
|
||
|
|
|
||
|
|
# Create visualization
|
||
|
|
fig, ax = plt.subplots(1, 1, figsize=(18, 10))
|
||
|
|
fig.suptitle('Real BTC Data - Timeframe Alignment Visualization\n(4 hours of real market data)',
|
||
|
|
fontsize=16, fontweight='bold')
|
||
|
|
|
||
|
|
# Plot timeframes from largest to smallest (background to foreground)
|
||
|
|
for i, tf in enumerate(reversed(timeframes)):
|
||
|
|
color = colors[timeframes.index(tf)]
|
||
|
|
alpha = alphas[timeframes.index(tf)]
|
||
|
|
show_labels = (tf in ["5min", "15min"]) # Only label smaller timeframes for clarity
|
||
|
|
|
||
|
|
plot_timeframe_bars(ax, aggregated_data[tf], tf, color, alpha, show_labels)
|
||
|
|
|
||
|
|
# Format the plot
|
||
|
|
ax.set_ylabel('Price (USD)', fontsize=12)
|
||
|
|
ax.set_xlabel('Time', fontsize=12)
|
||
|
|
ax.grid(True, alpha=0.3)
|
||
|
|
|
||
|
|
# Format x-axis
|
||
|
|
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
|
||
|
|
ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))
|
||
|
|
ax.xaxis.set_minor_locator(mdates.MinuteLocator(interval=30))
|
||
|
|
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
|
||
|
|
|
||
|
|
# Add legend
|
||
|
|
legend_elements = []
|
||
|
|
for i, tf in enumerate(timeframes):
|
||
|
|
legend_elements.append(plt.Rectangle((0,0),1,1,
|
||
|
|
facecolor=colors[i],
|
||
|
|
alpha=alphas[i],
|
||
|
|
label=f"{tf} ({len(aggregated_data[tf])} bars)"))
|
||
|
|
|
||
|
|
ax.legend(handles=legend_elements, loc='upper left', fontsize=10)
|
||
|
|
|
||
|
|
# Add explanation
|
||
|
|
explanation = ("Real BTC market data showing timeframe alignment.\n"
|
||
|
|
"Green bars = bullish (close > open), Red bars = bearish (close < open).\n"
|
||
|
|
"Each bar spans its full time period - smaller timeframes fit inside larger ones.")
|
||
|
|
ax.text(0.02, 0.98, explanation, transform=ax.transAxes,
|
||
|
|
verticalalignment='top', fontsize=10,
|
||
|
|
bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.9))
|
||
|
|
|
||
|
|
plt.tight_layout()
|
||
|
|
|
||
|
|
# Print alignment verification
|
||
|
|
print(f"\n🔍 Alignment Verification:")
|
||
|
|
bars_5m = aggregated_data["5min"]
|
||
|
|
bars_15m = aggregated_data["15min"]
|
||
|
|
|
||
|
|
for i, bar_15m in enumerate(bars_15m):
|
||
|
|
print(f"\n15min bar {i+1}: {bar_15m['timestamp']} | ${bar_15m['open']:.2f} -> ${bar_15m['close']:.2f}")
|
||
|
|
bar_15m_start = bar_15m['timestamp'] - pd.Timedelta(minutes=15)
|
||
|
|
|
||
|
|
contained_5m = []
|
||
|
|
for bar_5m in bars_5m:
|
||
|
|
bar_5m_start = bar_5m['timestamp'] - pd.Timedelta(minutes=5)
|
||
|
|
bar_5m_end = bar_5m['timestamp']
|
||
|
|
|
||
|
|
# Check if 5min bar is contained within 15min bar
|
||
|
|
if bar_15m_start <= bar_5m_start and bar_5m_end <= bar_15m['timestamp']:
|
||
|
|
contained_5m.append(bar_5m)
|
||
|
|
|
||
|
|
print(f" Contains {len(contained_5m)} x 5min bars:")
|
||
|
|
for j, bar_5m in enumerate(contained_5m):
|
||
|
|
print(f" {j+1}. {bar_5m['timestamp']} | ${bar_5m['open']:.2f} -> ${bar_5m['close']:.2f}")
|
||
|
|
|
||
|
|
if len(contained_5m) != 3:
|
||
|
|
print(f" ❌ ALIGNMENT ISSUE: Expected 3 bars, found {len(contained_5m)}")
|
||
|
|
else:
|
||
|
|
print(f" ✅ Alignment OK")
|
||
|
|
|
||
|
|
return fig
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""Main function."""
|
||
|
|
print("🚀 Real Data Timeframe Alignment Test")
|
||
|
|
print("=" * 45)
|
||
|
|
|
||
|
|
# Configuration
|
||
|
|
data_file = "./data/btcusd_1-min_data.csv"
|
||
|
|
|
||
|
|
# Check if data file exists
|
||
|
|
if not os.path.exists(data_file):
|
||
|
|
print(f"❌ Data file not found: {data_file}")
|
||
|
|
print("Please ensure the BTC data file exists in the ./data/ directory")
|
||
|
|
return False
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Load 4 hours of real data
|
||
|
|
minute_data = load_btc_data_4hours(data_file)
|
||
|
|
|
||
|
|
if not minute_data:
|
||
|
|
print("❌ Failed to load data")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Create visualization
|
||
|
|
fig = create_real_data_alignment_visualization(minute_data)
|
||
|
|
|
||
|
|
if fig:
|
||
|
|
plt.show()
|
||
|
|
|
||
|
|
print("\n✅ Real data alignment test completed!")
|
||
|
|
print("📊 In the chart, you should see:")
|
||
|
|
print(" - Real BTC price movements over 4 hours")
|
||
|
|
print(" - Each 15min bar contains exactly 3 x 5min bars")
|
||
|
|
print(" - Each 30min bar contains exactly 6 x 5min bars")
|
||
|
|
print(" - Each 1h bar contains exactly 12 x 5min bars")
|
||
|
|
print(" - All bars are properly aligned with no gaps or overlaps")
|
||
|
|
print(" - Green bars = bullish periods, Red bars = bearish periods")
|
||
|
|
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
success = main()
|
||
|
|
sys.exit(0 if success else 1)
|