Add daily model training scripts and terminal UI for live trading

- Introduced `train_daily.sh` for automating daily model retraining, including data download and model training steps.
- Added `install_cron.sh` for setting up a cron job to run the daily training script.
- Created `setup_schedule.sh` for configuring Systemd timers for daily training tasks.
- Implemented a terminal UI using Rich for real-time monitoring of trading performance, including metrics display and log handling.
- Updated `pyproject.toml` to include the `rich` dependency for UI functionality.
- Enhanced `.gitignore` to exclude model and log files.
- Added database support for trade persistence and metrics calculation.
- Updated README with installation and usage instructions for the new features.
This commit is contained in:
2026-01-18 11:08:57 +08:00
parent 35992ee374
commit b5550f4ff4
27 changed files with 3582 additions and 113 deletions

View File

@@ -3,7 +3,16 @@ Regime Detection Research Script with Walk-Forward Training.
Tests multiple holding horizons to find optimal parameters
without look-ahead bias.
Usage:
uv run python research/regime_detection.py [options]
Options:
--days DAYS Number of days of data (default: 90)
--start DATE Start date (YYYY-MM-DD), overrides --days
--end DATE End date (YYYY-MM-DD), defaults to now
"""
import argparse
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -25,18 +34,36 @@ TRAIN_RATIO = 0.7 # 70% train, 30% test
PROFIT_THRESHOLD = 0.005 # 0.5% profit target
Z_WINDOW = 24
FEE_RATE = 0.001 # 0.1% round-trip fee
DEFAULT_DAYS = 90 # Default lookback period in days
def load_data():
"""Load and align BTC/ETH data."""
def load_data(days: int = DEFAULT_DAYS, start_date: str = None, end_date: str = None):
"""
Load and align BTC/ETH data.
Args:
days: Number of days of historical data (default: 90)
start_date: Optional start date (YYYY-MM-DD), overrides days
end_date: Optional end date (YYYY-MM-DD), defaults to now
Returns:
Tuple of (df_btc, df_eth) DataFrames
"""
dm = DataManager()
df_btc = dm.load_data("okx", "BTC-USDT", "1h", MarketType.SPOT)
df_eth = dm.load_data("okx", "ETH-USDT", "1h", MarketType.SPOT)
# Filter to Oct-Dec 2025
start = pd.Timestamp("2025-10-01", tz="UTC")
end = pd.Timestamp("2025-12-31", tz="UTC")
# Determine date range
if end_date:
end = pd.Timestamp(end_date, tz="UTC")
else:
end = pd.Timestamp.now(tz="UTC")
if start_date:
start = pd.Timestamp(start_date, tz="UTC")
else:
start = end - pd.Timedelta(days=days)
df_btc = df_btc[(df_btc.index >= start) & (df_btc.index <= end)]
df_eth = df_eth[(df_eth.index >= start) & (df_eth.index <= end)]
@@ -46,7 +73,7 @@ def load_data():
df_btc = df_btc.loc[common]
df_eth = df_eth.loc[common]
logger.info(f"Loaded {len(common)} aligned hourly bars")
logger.info(f"Loaded {len(common)} aligned hourly bars from {start} to {end}")
return df_btc, df_eth
@@ -168,7 +195,10 @@ def calculate_mae(features, predictions, test_idx, horizon):
def calculate_net_profit(features, predictions, test_idx, horizon):
"""Calculate estimated net profit including fees."""
"""
Calculate estimated net profit including fees.
Enforces 'one trade at a time' to avoid inflating returns with overlapping signals.
"""
test_features = features.loc[test_idx]
spread = test_features['spread']
z_score = test_features['z_score']
@@ -176,7 +206,14 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
total_pnl = 0.0
n_trades = 0
# Track when we are free to trade again
next_trade_idx = 0
for i, (idx, pred) in enumerate(zip(test_idx, predictions)):
# Skip if we are still in a trade
if i < next_trade_idx:
continue
if pred != 1:
continue
@@ -203,6 +240,10 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
net_pnl = pnl - FEE_RATE
total_pnl += net_pnl
n_trades += 1
# Set next available trade index (simple non-overlapping logic)
# We assume we hold for 'horizon' bars
next_trade_idx = i + horizon
return total_pnl, n_trades
@@ -295,10 +336,54 @@ def test_horizons(features, horizons):
return results
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Regime detection research - test multiple horizons"
)
parser.add_argument(
"--days",
type=int,
default=DEFAULT_DAYS,
help=f"Number of days of data (default: {DEFAULT_DAYS})"
)
parser.add_argument(
"--start",
type=str,
default=None,
help="Start date (YYYY-MM-DD), overrides --days"
)
parser.add_argument(
"--end",
type=str,
default=None,
help="End date (YYYY-MM-DD), defaults to now"
)
parser.add_argument(
"--output",
type=str,
default="research/horizon_optimization_results.csv",
help="Output CSV path"
)
parser.add_argument(
"--output-horizon",
type=str,
default=None,
help="Path to save the best horizon (integer) to a file"
)
return parser.parse_args()
def main():
"""Main research function."""
# Load data
df_btc, df_eth = load_data()
args = parse_args()
# Load data with dynamic date range
df_btc, df_eth = load_data(
days=args.days,
start_date=args.start,
end_date=args.end
)
cq_df = load_cryptoquant_data()
# Calculate features
@@ -312,7 +397,7 @@ def main():
if not results:
print("No valid results!")
return
return None
# Find best by different metrics
results_df = pd.DataFrame(results)
@@ -331,9 +416,15 @@ def main():
print(f"Lowest MAE: {lowest_mae['horizon']:.0f}h (MAE={lowest_mae['avg_mae']:.2f}%)")
# Save results
output_path = "research/horizon_optimization_results.csv"
results_df.to_csv(output_path, index=False)
print(f"\nResults saved to {output_path}")
results_df.to_csv(args.output, index=False)
print(f"\nResults saved to {args.output}")
# Save best horizon if requested
if args.output_horizon:
best_h = int(best_pnl['horizon'])
with open(args.output_horizon, 'w') as f:
f.write(str(best_h))
print(f"Best horizon {best_h}h saved to {args.output_horizon}")
return results_df