Add daily model training scripts and terminal UI for live trading
- Introduced `train_daily.sh` for automating daily model retraining, including data download and model training steps. - Added `install_cron.sh` for setting up a cron job to run the daily training script. - Created `setup_schedule.sh` for configuring Systemd timers for daily training tasks. - Implemented a terminal UI using Rich for real-time monitoring of trading performance, including metrics display and log handling. - Updated `pyproject.toml` to include the `rich` dependency for UI functionality. - Enhanced `.gitignore` to exclude model and log files. - Added database support for trade persistence and metrics calculation. - Updated README with installation and usage instructions for the new features.
This commit is contained in:
@@ -3,7 +3,16 @@ Regime Detection Research Script with Walk-Forward Training.
|
||||
|
||||
Tests multiple holding horizons to find optimal parameters
|
||||
without look-ahead bias.
|
||||
|
||||
Usage:
|
||||
uv run python research/regime_detection.py [options]
|
||||
|
||||
Options:
|
||||
--days DAYS Number of days of data (default: 90)
|
||||
--start DATE Start date (YYYY-MM-DD), overrides --days
|
||||
--end DATE End date (YYYY-MM-DD), defaults to now
|
||||
"""
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@@ -25,18 +34,36 @@ TRAIN_RATIO = 0.7 # 70% train, 30% test
|
||||
PROFIT_THRESHOLD = 0.005 # 0.5% profit target
|
||||
Z_WINDOW = 24
|
||||
FEE_RATE = 0.001 # 0.1% round-trip fee
|
||||
DEFAULT_DAYS = 90 # Default lookback period in days
|
||||
|
||||
|
||||
def load_data():
|
||||
"""Load and align BTC/ETH data."""
|
||||
def load_data(days: int = DEFAULT_DAYS, start_date: str = None, end_date: str = None):
|
||||
"""
|
||||
Load and align BTC/ETH data.
|
||||
|
||||
Args:
|
||||
days: Number of days of historical data (default: 90)
|
||||
start_date: Optional start date (YYYY-MM-DD), overrides days
|
||||
end_date: Optional end date (YYYY-MM-DD), defaults to now
|
||||
|
||||
Returns:
|
||||
Tuple of (df_btc, df_eth) DataFrames
|
||||
"""
|
||||
dm = DataManager()
|
||||
|
||||
df_btc = dm.load_data("okx", "BTC-USDT", "1h", MarketType.SPOT)
|
||||
df_eth = dm.load_data("okx", "ETH-USDT", "1h", MarketType.SPOT)
|
||||
|
||||
# Filter to Oct-Dec 2025
|
||||
start = pd.Timestamp("2025-10-01", tz="UTC")
|
||||
end = pd.Timestamp("2025-12-31", tz="UTC")
|
||||
# Determine date range
|
||||
if end_date:
|
||||
end = pd.Timestamp(end_date, tz="UTC")
|
||||
else:
|
||||
end = pd.Timestamp.now(tz="UTC")
|
||||
|
||||
if start_date:
|
||||
start = pd.Timestamp(start_date, tz="UTC")
|
||||
else:
|
||||
start = end - pd.Timedelta(days=days)
|
||||
|
||||
df_btc = df_btc[(df_btc.index >= start) & (df_btc.index <= end)]
|
||||
df_eth = df_eth[(df_eth.index >= start) & (df_eth.index <= end)]
|
||||
@@ -46,7 +73,7 @@ def load_data():
|
||||
df_btc = df_btc.loc[common]
|
||||
df_eth = df_eth.loc[common]
|
||||
|
||||
logger.info(f"Loaded {len(common)} aligned hourly bars")
|
||||
logger.info(f"Loaded {len(common)} aligned hourly bars from {start} to {end}")
|
||||
return df_btc, df_eth
|
||||
|
||||
|
||||
@@ -168,7 +195,10 @@ def calculate_mae(features, predictions, test_idx, horizon):
|
||||
|
||||
|
||||
def calculate_net_profit(features, predictions, test_idx, horizon):
|
||||
"""Calculate estimated net profit including fees."""
|
||||
"""
|
||||
Calculate estimated net profit including fees.
|
||||
Enforces 'one trade at a time' to avoid inflating returns with overlapping signals.
|
||||
"""
|
||||
test_features = features.loc[test_idx]
|
||||
spread = test_features['spread']
|
||||
z_score = test_features['z_score']
|
||||
@@ -176,7 +206,14 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
|
||||
total_pnl = 0.0
|
||||
n_trades = 0
|
||||
|
||||
# Track when we are free to trade again
|
||||
next_trade_idx = 0
|
||||
|
||||
for i, (idx, pred) in enumerate(zip(test_idx, predictions)):
|
||||
# Skip if we are still in a trade
|
||||
if i < next_trade_idx:
|
||||
continue
|
||||
|
||||
if pred != 1:
|
||||
continue
|
||||
|
||||
@@ -203,6 +240,10 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
|
||||
net_pnl = pnl - FEE_RATE
|
||||
total_pnl += net_pnl
|
||||
n_trades += 1
|
||||
|
||||
# Set next available trade index (simple non-overlapping logic)
|
||||
# We assume we hold for 'horizon' bars
|
||||
next_trade_idx = i + horizon
|
||||
|
||||
return total_pnl, n_trades
|
||||
|
||||
@@ -295,10 +336,54 @@ def test_horizons(features, horizons):
|
||||
return results
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Regime detection research - test multiple horizons"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
type=int,
|
||||
default=DEFAULT_DAYS,
|
||||
help=f"Number of days of data (default: {DEFAULT_DAYS})"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--start",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Start date (YYYY-MM-DD), overrides --days"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--end",
|
||||
type=str,
|
||||
default=None,
|
||||
help="End date (YYYY-MM-DD), defaults to now"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
default="research/horizon_optimization_results.csv",
|
||||
help="Output CSV path"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-horizon",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to save the best horizon (integer) to a file"
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main research function."""
|
||||
# Load data
|
||||
df_btc, df_eth = load_data()
|
||||
args = parse_args()
|
||||
|
||||
# Load data with dynamic date range
|
||||
df_btc, df_eth = load_data(
|
||||
days=args.days,
|
||||
start_date=args.start,
|
||||
end_date=args.end
|
||||
)
|
||||
cq_df = load_cryptoquant_data()
|
||||
|
||||
# Calculate features
|
||||
@@ -312,7 +397,7 @@ def main():
|
||||
|
||||
if not results:
|
||||
print("No valid results!")
|
||||
return
|
||||
return None
|
||||
|
||||
# Find best by different metrics
|
||||
results_df = pd.DataFrame(results)
|
||||
@@ -331,9 +416,15 @@ def main():
|
||||
print(f"Lowest MAE: {lowest_mae['horizon']:.0f}h (MAE={lowest_mae['avg_mae']:.2f}%)")
|
||||
|
||||
# Save results
|
||||
output_path = "research/horizon_optimization_results.csv"
|
||||
results_df.to_csv(output_path, index=False)
|
||||
print(f"\nResults saved to {output_path}")
|
||||
results_df.to_csv(args.output, index=False)
|
||||
print(f"\nResults saved to {args.output}")
|
||||
|
||||
# Save best horizon if requested
|
||||
if args.output_horizon:
|
||||
best_h = int(best_pnl['horizon'])
|
||||
with open(args.output_horizon, 'w') as f:
|
||||
f.write(str(best_h))
|
||||
print(f"Best horizon {best_h}h saved to {args.output_horizon}")
|
||||
|
||||
return results_df
|
||||
|
||||
|
||||
Reference in New Issue
Block a user