54 lines
1.8 KiB
Python
54 lines
1.8 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Check BTC data file format.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
def check_data():
|
||
|
|
try:
|
||
|
|
print("📊 Checking BTC data file format...")
|
||
|
|
|
||
|
|
# Load first few rows
|
||
|
|
df = pd.read_csv('./data/btcusd_1-min_data.csv', nrows=10)
|
||
|
|
|
||
|
|
print(f"📋 Columns: {list(df.columns)}")
|
||
|
|
print(f"📈 Shape: {df.shape}")
|
||
|
|
print(f"🔍 First 5 rows:")
|
||
|
|
print(df.head())
|
||
|
|
print(f"📊 Data types:")
|
||
|
|
print(df.dtypes)
|
||
|
|
|
||
|
|
# Check for timestamp-like columns
|
||
|
|
print(f"\n🕐 Looking for timestamp columns...")
|
||
|
|
for col in df.columns:
|
||
|
|
if any(word in col.lower() for word in ['time', 'date', 'timestamp']):
|
||
|
|
print(f" Found: {col}")
|
||
|
|
print(f" Sample values: {df[col].head(3).tolist()}")
|
||
|
|
|
||
|
|
# Check date range
|
||
|
|
print(f"\n📅 Checking date range...")
|
||
|
|
timestamp_col = None
|
||
|
|
for col in df.columns:
|
||
|
|
if any(word in col.lower() for word in ['time', 'date', 'timestamp']):
|
||
|
|
timestamp_col = col
|
||
|
|
break
|
||
|
|
|
||
|
|
if timestamp_col:
|
||
|
|
# Load more data to check date range
|
||
|
|
df_sample = pd.read_csv('./data/btcusd_1-min_data.csv', nrows=1000)
|
||
|
|
df_sample[timestamp_col] = pd.to_datetime(df_sample[timestamp_col])
|
||
|
|
print(f" Date range (first 1000 rows): {df_sample[timestamp_col].min()} to {df_sample[timestamp_col].max()}")
|
||
|
|
|
||
|
|
# Check unique dates
|
||
|
|
unique_dates = df_sample[timestamp_col].dt.date.unique()
|
||
|
|
print(f" Unique dates in sample: {sorted(unique_dates)[:10]}") # First 10 dates
|
||
|
|
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
check_data()
|