import pandas as pd from sqlalchemy import create_engine, text # Load the dataset df = pd.read_csv('./data/btcusd_1-min_data.csv') # Preprocess the data df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s') df.set_index('Timestamp', inplace=True) # Remove rows with invalid Timestamps df = df[~df.index.isna()] # Create a connection to the SQLite database engine = create_engine('sqlite:///databases/bitcoin_historical_data.db') # Check if the table already exists and get the last timestamp from the database with engine.connect() as connection: query = text("SELECT MAX(Timestamp) FROM bitcoin_data") last_timestamp = connection.execute(query).fetchone()[0] # If there is no data in the table, last_timestamp will be None if last_timestamp is not None: # Filter the new data to include only rows with a timestamp later than the last timestamp in the database df = df[df.index > last_timestamp] # If there are new rows, append them to the database if not df.empty: df.to_sql('bitcoin_data', engine, if_exists='append', index=True) print(f"Added {len(df)} new rows to the database.") else: print("No new data to add.")