#!/usr/bin/env python3
"""
Check BTC data file format.
"""

import pandas as pd

def check_data():
    try:
        print("📊 Checking BTC data file format...")
        
        # Load first few rows
        df = pd.read_csv('./data/btcusd_1-min_data.csv', nrows=10)
        
        print(f"📋 Columns: {list(df.columns)}")
        print(f"📈 Shape: {df.shape}")
        print(f"🔍 First 5 rows:")
        print(df.head())
        print(f"📊 Data types:")
        print(df.dtypes)
        
        # Check for timestamp-like columns
        print(f"\n🕐 Looking for timestamp columns...")
        for col in df.columns:
            if any(word in col.lower() for word in ['time', 'date', 'timestamp']):
                print(f"  Found: {col}")
                print(f"  Sample values: {df[col].head(3).tolist()}")
        
        # Check date range
        print(f"\n📅 Checking date range...")
        timestamp_col = None
        for col in df.columns:
            if any(word in col.lower() for word in ['time', 'date', 'timestamp']):
                timestamp_col = col
                break
        
        if timestamp_col:
            # Load more data to check date range
            df_sample = pd.read_csv('./data/btcusd_1-min_data.csv', nrows=1000)
            df_sample[timestamp_col] = pd.to_datetime(df_sample[timestamp_col])
            print(f"  Date range (first 1000 rows): {df_sample[timestamp_col].min()} to {df_sample[timestamp_col].max()}")
            
            # Check unique dates
            unique_dates = df_sample[timestamp_col].dt.date.unique()
            print(f"  Unique dates in sample: {sorted(unique_dates)[:10]}")  # First 10 dates
        
        return True
        
    except Exception as e:
        print(f"❌ Error: {e}")
        return False

if __name__ == "__main__":
    check_data()