This commit is contained in:
Simon Moisy
2025-03-13 15:21:06 +08:00
parent ed67968cc2
commit 302be95ce7
13 changed files with 690 additions and 0 deletions

15
old/datasets.py Normal file
View File

@@ -0,0 +1,15 @@
import os
import subprocess
class Datasets:
@staticmethod
def download_kaggle_dataset(dataset_id, download_path):
os.environ["KAGGLE_CONFIG_DIR"] = os.path.expanduser("~/.kaggle")
command = ["kaggle", "datasets", "download", "-d", dataset_id, "-p", download_path]
try:
subprocess.run(command, check=True)
print(f"Dataset downloaded successfully to {download_path}")
except subprocess.CalledProcessError as e:
print(f"Error downloading dataset: {e}")

5
old/download_dataset.py Normal file
View File

@@ -0,0 +1,5 @@
from datasets import Datasets
dataset_id = "mczielinski/bitcoin-historical-data"
download_path = "./data"
Datasets.download_kaggle_dataset(dataset_id, download_path)

36
old/drop_data.py Normal file
View File

@@ -0,0 +1,36 @@
import sqlite3
from datetime import datetime
# Specify the database file path
db_path = 'bitcoin_historical_data.db'
# Create a connection to the database
connection = sqlite3.connect(db_path)
# Create a cursor object
cursor = connection.cursor()
# Define the date threshold
date_threshold = datetime(2025, 1, 15)
# Convert the date threshold to the format used in SQLite (YYYY-MM-DD HH:MM:SS.SSS)
date_threshold_str = date_threshold.strftime('%Y-%m-%d 00:00:00.000')
# SQL query to delete rows with Timestamp greater than the date threshold
query = """
DELETE FROM bitcoin_data
WHERE Timestamp > ?
"""
# Execute the query with the date threshold as a parameter
cursor.execute(query, (date_threshold_str,))
# Commit the changes
connection.commit()
# Get the number of deleted rows
deleted_rows = cursor.rowcount
print(f"Deleted {deleted_rows} rows with Timestamp greater than January 15th, 2025")
# Close the connection
connection.close()

View File

@@ -0,0 +1,21 @@
from BitcoinPricePredictor import BitcoinPricePredictor
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix
if __name__ == "__main__":
model = load_model('models/model_2025-01-21_04-49-43.h5')
predictor = BitcoinPricePredictor(model=model, db_path='bitcoin_historical_data.db')
missing_data = predictor.load_new_data_from_model()
print(f"missing data {len(missing_data)}")
if not missing_data.empty:
predictions, reality = predictor.make_predictions_w_reality(missing_data)
print(f"predictions {len(predictions)}")
cm = confusion_matrix(reality, predictions[1:])
print("Confusion Matrix:")
print(cm)
else:
print("No new data found.")

View File

@@ -0,0 +1,32 @@
import pandas as pd
from sqlalchemy import create_engine, text
# Load the dataset
df = pd.read_csv('./data/btcusd_1-min_data.csv')
# Preprocess the data
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
df.set_index('Timestamp', inplace=True)
# Remove rows with invalid Timestamps
df = df[~df.index.isna()]
# Create a connection to the SQLite database
engine = create_engine('sqlite:///bitcoin_historical_data.db')
# Check if the table already exists and get the last timestamp from the database
with engine.connect() as connection:
query = text("SELECT MAX(Timestamp) FROM bitcoin_data")
last_timestamp = connection.execute(query).fetchone()[0]
# If there is no data in the table, last_timestamp will be None
if last_timestamp is not None:
# Filter the new data to include only rows with a timestamp later than the last timestamp in the database
df = df[df.index > last_timestamp]
# If there are new rows, append them to the database
if not df.empty:
df.to_sql('bitcoin_data', engine, if_exists='append', index=True)
print(f"Added {len(df)} new rows to the database.")
else:
print("No new data to add.")