CryptoMarketParser/utils/parse_btc_csv_to_db.py

32 lines
1.1 KiB
Python
Raw Permalink Normal View History

2025-03-13 15:21:06 +08:00
import pandas as pd
from sqlalchemy import create_engine, text
# Load the dataset
df = pd.read_csv('./data/btcusd_1-min_data.csv')
# Preprocess the data
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
df.set_index('Timestamp', inplace=True)
# Remove rows with invalid Timestamps
df = df[~df.index.isna()]
# Create a connection to the SQLite database
engine = create_engine('sqlite:///databases/bitcoin_historical_data.db')
2025-03-13 15:21:06 +08:00
# Check if the table already exists and get the last timestamp from the database
with engine.connect() as connection:
query = text("SELECT MAX(Timestamp) FROM bitcoin_data")
last_timestamp = connection.execute(query).fetchone()[0]
# If there is no data in the table, last_timestamp will be None
if last_timestamp is not None:
# Filter the new data to include only rows with a timestamp later than the last timestamp in the database
df = df[df.index > last_timestamp]
# If there are new rows, append them to the database
if not df.empty:
df.to_sql('bitcoin_data', engine, if_exists='append', index=True)
print(f"Added {len(df)} new rows to the database.")
else:
print("No new data to add.")