import os import json import pandas as pd import csv from collections import defaultdict RESULTS_DIR = "results" DATA_DIR = "data" class Storage: """Storage class for storing and loading results and data""" def __init__(self, logging=None, results_dir=RESULTS_DIR, data_dir=DATA_DIR): self.results_dir = results_dir self.data_dir = data_dir self.logging = logging # Create directories if they don't exist os.makedirs(self.results_dir, exist_ok=True) os.makedirs(self.data_dir, exist_ok=True) def load_data(self, file_path, start_date, stop_date): """Load data with optimized dtypes and filtering, supporting CSV and JSON input Args: file_path: path to the data file start_date: start date stop_date: stop date Returns: pandas DataFrame """ # Determine file type _, ext = os.path.splitext(file_path) ext = ext.lower() try: if ext == ".json": with open(os.path.join(self.data_dir, file_path), 'r') as f: raw = json.load(f) data = pd.DataFrame(raw["Data"]) # Convert columns to lowercase data.columns = data.columns.str.lower() # Convert timestamp to datetime data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s") # Filter by date range data = data[(data["timestamp"] >= start_date) & (data["timestamp"] <= stop_date)] if self.logging is not None: self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}") return data.set_index("timestamp") else: # Define optimized dtypes dtypes = { 'Open': 'float32', 'High': 'float32', 'Low': 'float32', 'Close': 'float32', 'Volume': 'float32' } # Read data with original capitalized column names data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes) # Convert timestamp to datetime if 'Timestamp' in data.columns: data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s') # Filter by date range data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)] # Now convert column names to lowercase data.columns = data.columns.str.lower() if self.logging is not None: self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}") return data.set_index('timestamp') else: # Attempt to use the first column if 'Timestamp' is not present data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True) data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s') data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= stop_date)] data.columns = data.columns.str.lower() # Ensure all other columns are lower if self.logging is not None: self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}") return data.set_index('timestamp') except Exception as e: if self.logging is not None: self.logging.error(f"Error loading data from {file_path}: {e}") # Return an empty DataFrame with a DatetimeIndex return pd.DataFrame(index=pd.to_datetime([])) def save_data(self, data: pd.DataFrame, file_path: str): """Save processed data to a CSV file. If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps (seconds since epoch) before saving. The index is saved as a column named 'timestamp'. Args: data (pd.DataFrame): data to save. file_path (str): path to the data file relative to the data_dir. """ data_to_save = data.copy() if isinstance(data_to_save.index, pd.DatetimeIndex): # Convert DatetimeIndex to Unix timestamp (float seconds since epoch) # and make it a column named 'timestamp'. data_to_save['timestamp'] = data_to_save.index.astype('int64') / 1e9 # Reset index so 'timestamp' column is saved and old DatetimeIndex is not saved as a column. # We want the 'timestamp' column to be the first one. data_to_save.reset_index(drop=True, inplace=True) # Ensure 'timestamp' is the first column if other columns exist if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1: cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp'] data_to_save = data_to_save[cols] elif pd.api.types.is_numeric_dtype(data_to_save.index.dtype): # If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle), # make it a column named 'timestamp'. data_to_save['timestamp'] = data_to_save.index data_to_save.reset_index(drop=True, inplace=True) if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1: cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp'] data_to_save = data_to_save[cols] else: # For other index types, or if no index that we want to specifically handle, # save with the current index. pandas to_csv will handle it. # This branch might be removed if we strictly expect either DatetimeIndex or a numeric one from previous save. pass # data_to_save remains as is, to_csv will write its index if index=True # Save to CSV, ensuring the 'timestamp' column (if created) is written, and not the DataFrame's active index. full_path = os.path.join(self.data_dir, file_path) data_to_save.to_csv(full_path, index=False) # index=False because timestamp is now a column if self.logging is not None: self.logging.info(f"Data saved to {full_path} with Unix timestamp column.") def format_row(self, row): """Format a row for a combined results CSV file Args: row: row to format Returns: formatted row """ return { "timeframe": row["timeframe"], "stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%", "n_trades": row["n_trades"], "n_stop_loss": row["n_stop_loss"], "win_rate": f"{row['win_rate']*100:.2f}%", "max_drawdown": f"{row['max_drawdown']*100:.2f}%", "avg_trade": f"{row['avg_trade']*100:.2f}%", "profit_ratio": f"{row['profit_ratio']*100:.2f}%", "final_usd": f"{row['final_usd']:.2f}", "total_fees_usd": f"{row['total_fees_usd']:.2f}", } def write_results_chunk(self, filename, fieldnames, rows, write_header=False, initial_usd=None): """Write a chunk of results to a CSV file Args: filename: filename to write to fieldnames: list of fieldnames rows: list of rows write_header: whether to write the header initial_usd: initial USD """ mode = 'w' if write_header else 'a' with open(filename, mode, newline="") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) if write_header: csvfile.write(f"# initial_usd: {initial_usd}\n") writer.writeheader() for row in rows: # Only keep keys that are in fieldnames filtered_row = {k: v for k, v in row.items() if k in fieldnames} writer.writerow(filtered_row) def write_backtest_results(self, filename, fieldnames, rows, metadata_lines=None): """Write a combined results to a CSV file Args: filename: filename to write to fieldnames: list of fieldnames rows: list of rows metadata_lines: optional list of strings to write as header comments """ fname = os.path.join(self.results_dir, filename) with open(fname, "w", newline="") as csvfile: if metadata_lines: for line in metadata_lines: csvfile.write(f"{line}\n") writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t') writer.writeheader() for row in rows: writer.writerow(self.format_row(row)) if self.logging is not None: self.logging.info(f"Combined results written to {fname}") def write_trades(self, all_trade_rows, trades_fieldnames): """Write trades to a CSV file Args: all_trade_rows: list of trade rows trades_fieldnames: list of trade fieldnames logging: logging object """ trades_by_combo = defaultdict(list) for trade in all_trade_rows: tf = trade.get("timeframe") sl = trade.get("stop_loss_pct") trades_by_combo[(tf, sl)].append(trade) for (tf, sl), trades in trades_by_combo.items(): sl_percent = int(round(sl * 100)) trades_filename = os.path.join(self.results_dir, f"trades_{tf}_ST{sl_percent}pct.csv") with open(trades_filename, "w", newline="") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames) writer.writeheader() for trade in trades: writer.writerow({k: trade.get(k, "") for k in trades_fieldnames}) if self.logging is not None: self.logging.info(f"Trades written to {trades_filename}")