#!/usr/bin/env python3 """ Real data alignment test with BTC data limited to 4 hours for clear visualization. """ import pandas as pd import matplotlib.pyplot as plt import matplotlib.dates as mdates from matplotlib.patches import Rectangle import sys import os # Add the project root to Python path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from IncrementalTrader.utils import aggregate_minute_data_to_timeframe, parse_timeframe_to_minutes def load_btc_data_4hours(file_path: str) -> list: """ Load 4 hours of BTC minute data from CSV file. Args: file_path: Path to the CSV file Returns: List of minute OHLCV data dictionaries """ print(f"๐Ÿ“Š Loading 4 hours of BTC data from {file_path}") try: # Load the CSV file df = pd.read_csv(file_path) print(f" ๐Ÿ“ˆ Loaded {len(df)} total rows") # Handle Unix timestamp format if 'Timestamp' in df.columns: print(f" ๐Ÿ• Converting Unix timestamps...") df['timestamp'] = pd.to_datetime(df['Timestamp'], unit='s') # Standardize column names column_mapping = {} for col in df.columns: col_lower = col.lower() if 'open' in col_lower: column_mapping[col] = 'open' elif 'high' in col_lower: column_mapping[col] = 'high' elif 'low' in col_lower: column_mapping[col] = 'low' elif 'close' in col_lower: column_mapping[col] = 'close' elif 'volume' in col_lower: column_mapping[col] = 'volume' df = df.rename(columns=column_mapping) # Remove rows with zero or invalid prices initial_len = len(df) df = df[(df['open'] > 0) & (df['high'] > 0) & (df['low'] > 0) & (df['close'] > 0)] if len(df) < initial_len: print(f" ๐Ÿงน Removed {initial_len - len(df)} rows with invalid prices") # Sort by timestamp df = df.sort_values('timestamp') # Find a good 4-hour period with active trading print(f" ๐Ÿ“… Finding a good 4-hour period...") # Group by date and find dates with good data df['date'] = df['timestamp'].dt.date date_counts = df.groupby('date').size() good_dates = date_counts[date_counts >= 1000].index # Dates with lots of data if len(good_dates) == 0: print(f" โŒ No dates with sufficient data found") return [] # Pick a recent date with good data selected_date = good_dates[-1] df_date = df[df['date'] == selected_date].copy() print(f" โœ… Selected date: {selected_date} with {len(df_date)} data points") # Find a 4-hour period with good price movement # Look for periods with reasonable price volatility df_date['hour'] = df_date['timestamp'].dt.hour best_start_hour = None best_volatility = 0 # Try different 4-hour windows for start_hour in range(0, 21): # 0-20 (so 4-hour window fits in 24h) end_hour = start_hour + 4 window_data = df_date[ (df_date['hour'] >= start_hour) & (df_date['hour'] < end_hour) ] if len(window_data) >= 200: # At least 200 minutes of data # Calculate volatility as price range price_range = window_data['high'].max() - window_data['low'].min() avg_price = window_data['close'].mean() volatility = price_range / avg_price if avg_price > 0 else 0 if volatility > best_volatility: best_volatility = volatility best_start_hour = start_hour if best_start_hour is None: # Fallback: just take first 4 hours of data df_4h = df_date.head(240) # 4 hours = 240 minutes print(f" ๐Ÿ“Š Using first 4 hours as fallback") else: end_hour = best_start_hour + 4 df_4h = df_date[ (df_date['hour'] >= best_start_hour) & (df_date['hour'] < end_hour) ].head(240) # Limit to 240 minutes max print(f" ๐Ÿ“Š Selected 4-hour window: {best_start_hour:02d}:00 - {end_hour:02d}:00") print(f" ๐Ÿ“ˆ Price volatility: {best_volatility:.4f}") print(f" โœ… Final dataset: {len(df_4h)} rows from {df_4h['timestamp'].min()} to {df_4h['timestamp'].max()}") # Convert to list of dictionaries minute_data = [] for _, row in df_4h.iterrows(): minute_data.append({ 'timestamp': row['timestamp'], 'open': float(row['open']), 'high': float(row['high']), 'low': float(row['low']), 'close': float(row['close']), 'volume': float(row['volume']) }) return minute_data except Exception as e: print(f" โŒ Error loading data: {e}") import traceback traceback.print_exc() return [] def plot_timeframe_bars(ax, data, timeframe, color, alpha=0.7, show_labels=True): """Plot timeframe bars with clear boundaries.""" if not data: return timeframe_minutes = parse_timeframe_to_minutes(timeframe) for i, bar in enumerate(data): timestamp = bar['timestamp'] open_price = bar['open'] high_price = bar['high'] low_price = bar['low'] close_price = bar['close'] # Calculate bar boundaries (end timestamp mode) bar_start = timestamp - pd.Timedelta(minutes=timeframe_minutes) bar_end = timestamp # Draw the bar as a rectangle spanning the full time period body_height = abs(close_price - open_price) body_bottom = min(open_price, close_price) # Determine color based on bullish/bearish if close_price >= open_price: # Bullish - use green tint bar_color = 'lightgreen' if color == 'green' else color edge_color = 'darkgreen' else: # Bearish - use red tint bar_color = 'lightcoral' if color == 'green' else color edge_color = 'darkred' # Bar body rect = Rectangle((bar_start, body_bottom), bar_end - bar_start, body_height, facecolor=bar_color, edgecolor=edge_color, alpha=alpha, linewidth=1) ax.add_patch(rect) # High-low wick at center bar_center = bar_start + (bar_end - bar_start) / 2 ax.plot([bar_center, bar_center], [low_price, high_price], color=edge_color, linewidth=2, alpha=alpha) # Add labels for smaller timeframes if show_labels and timeframe in ["5min", "15min"]: ax.text(bar_center, high_price + (high_price * 0.001), f"{timeframe}\n#{i+1}", ha='center', va='bottom', fontsize=7, fontweight='bold') def create_real_data_alignment_visualization(minute_data): """Create a clear visualization of timeframe alignment with real data.""" print("๐ŸŽฏ Creating Real Data Timeframe Alignment Visualization") print("=" * 60) if not minute_data: print("โŒ No data to visualize") return None print(f"๐Ÿ“Š Using {len(minute_data)} minute data points") print(f"๐Ÿ“… Range: {minute_data[0]['timestamp']} to {minute_data[-1]['timestamp']}") # Show price range prices = [d['close'] for d in minute_data] print(f"๐Ÿ’ฐ Price range: ${min(prices):.2f} - ${max(prices):.2f}") # Aggregate to different timeframes timeframes = ["5min", "15min", "30min", "1h"] colors = ['red', 'green', 'blue', 'purple'] alphas = [0.8, 0.6, 0.4, 0.2] aggregated_data = {} for tf in timeframes: aggregated_data[tf] = aggregate_minute_data_to_timeframe(minute_data, tf, "end") print(f" {tf}: {len(aggregated_data[tf])} bars") # Create visualization fig, ax = plt.subplots(1, 1, figsize=(18, 10)) fig.suptitle('Real BTC Data - Timeframe Alignment Visualization\n(4 hours of real market data)', fontsize=16, fontweight='bold') # Plot timeframes from largest to smallest (background to foreground) for i, tf in enumerate(reversed(timeframes)): color = colors[timeframes.index(tf)] alpha = alphas[timeframes.index(tf)] show_labels = (tf in ["5min", "15min"]) # Only label smaller timeframes for clarity plot_timeframe_bars(ax, aggregated_data[tf], tf, color, alpha, show_labels) # Format the plot ax.set_ylabel('Price (USD)', fontsize=12) ax.set_xlabel('Time', fontsize=12) ax.grid(True, alpha=0.3) # Format x-axis ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) ax.xaxis.set_major_locator(mdates.HourLocator(interval=1)) ax.xaxis.set_minor_locator(mdates.MinuteLocator(interval=30)) plt.setp(ax.xaxis.get_majorticklabels(), rotation=45) # Add legend legend_elements = [] for i, tf in enumerate(timeframes): legend_elements.append(plt.Rectangle((0,0),1,1, facecolor=colors[i], alpha=alphas[i], label=f"{tf} ({len(aggregated_data[tf])} bars)")) ax.legend(handles=legend_elements, loc='upper left', fontsize=10) # Add explanation explanation = ("Real BTC market data showing timeframe alignment.\n" "Green bars = bullish (close > open), Red bars = bearish (close < open).\n" "Each bar spans its full time period - smaller timeframes fit inside larger ones.") ax.text(0.02, 0.98, explanation, transform=ax.transAxes, verticalalignment='top', fontsize=10, bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.9)) plt.tight_layout() # Print alignment verification print(f"\n๐Ÿ” Alignment Verification:") bars_5m = aggregated_data["5min"] bars_15m = aggregated_data["15min"] for i, bar_15m in enumerate(bars_15m): print(f"\n15min bar {i+1}: {bar_15m['timestamp']} | ${bar_15m['open']:.2f} -> ${bar_15m['close']:.2f}") bar_15m_start = bar_15m['timestamp'] - pd.Timedelta(minutes=15) contained_5m = [] for bar_5m in bars_5m: bar_5m_start = bar_5m['timestamp'] - pd.Timedelta(minutes=5) bar_5m_end = bar_5m['timestamp'] # Check if 5min bar is contained within 15min bar if bar_15m_start <= bar_5m_start and bar_5m_end <= bar_15m['timestamp']: contained_5m.append(bar_5m) print(f" Contains {len(contained_5m)} x 5min bars:") for j, bar_5m in enumerate(contained_5m): print(f" {j+1}. {bar_5m['timestamp']} | ${bar_5m['open']:.2f} -> ${bar_5m['close']:.2f}") if len(contained_5m) != 3: print(f" โŒ ALIGNMENT ISSUE: Expected 3 bars, found {len(contained_5m)}") else: print(f" โœ… Alignment OK") return fig def main(): """Main function.""" print("๐Ÿš€ Real Data Timeframe Alignment Test") print("=" * 45) # Configuration data_file = "./data/btcusd_1-min_data.csv" # Check if data file exists if not os.path.exists(data_file): print(f"โŒ Data file not found: {data_file}") print("Please ensure the BTC data file exists in the ./data/ directory") return False try: # Load 4 hours of real data minute_data = load_btc_data_4hours(data_file) if not minute_data: print("โŒ Failed to load data") return False # Create visualization fig = create_real_data_alignment_visualization(minute_data) if fig: plt.show() print("\nโœ… Real data alignment test completed!") print("๐Ÿ“Š In the chart, you should see:") print(" - Real BTC price movements over 4 hours") print(" - Each 15min bar contains exactly 3 x 5min bars") print(" - Each 30min bar contains exactly 6 x 5min bars") print(" - Each 1h bar contains exactly 12 x 5min bars") print(" - All bars are properly aligned with no gaps or overlaps") print(" - Green bars = bullish periods, Red bars = bearish periods") return True except Exception as e: print(f"โŒ Error: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)