Cycles/test/check_data.py
2025-05-28 18:26:51 +08:00

54 lines
1.8 KiB
Python

#!/usr/bin/env python3
"""
Check BTC data file format.
"""
import pandas as pd
def check_data():
try:
print("📊 Checking BTC data file format...")
# Load first few rows
df = pd.read_csv('./data/btcusd_1-min_data.csv', nrows=10)
print(f"📋 Columns: {list(df.columns)}")
print(f"📈 Shape: {df.shape}")
print(f"🔍 First 5 rows:")
print(df.head())
print(f"📊 Data types:")
print(df.dtypes)
# Check for timestamp-like columns
print(f"\n🕐 Looking for timestamp columns...")
for col in df.columns:
if any(word in col.lower() for word in ['time', 'date', 'timestamp']):
print(f" Found: {col}")
print(f" Sample values: {df[col].head(3).tolist()}")
# Check date range
print(f"\n📅 Checking date range...")
timestamp_col = None
for col in df.columns:
if any(word in col.lower() for word in ['time', 'date', 'timestamp']):
timestamp_col = col
break
if timestamp_col:
# Load more data to check date range
df_sample = pd.read_csv('./data/btcusd_1-min_data.csv', nrows=1000)
df_sample[timestamp_col] = pd.to_datetime(df_sample[timestamp_col])
print(f" Date range (first 1000 rows): {df_sample[timestamp_col].min()} to {df_sample[timestamp_col].max()}")
# Check unique dates
unique_dates = df_sample[timestamp_col].dt.date.unique()
print(f" Unique dates in sample: {sorted(unique_dates)[:10]}") # First 10 dates
return True
except Exception as e:
print(f"❌ Error: {e}")
return False
if __name__ == "__main__":
check_data()