Enhance backtesting framework with static task processing and progress management. Introduced static task processing for parallel execution, improved error handling, and added a progress manager for better task tracking. Updated BacktestRunner to support progress callbacks and optimized worker allocation based on system resources. Added new configuration files for flexible backtesting setups.

This commit is contained in:
Simon Moisy
2025-07-10 10:23:41 +08:00
parent be331ed631
commit 65f30a4020
11 changed files with 830 additions and 156 deletions

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
Progress Manager for tracking multiple parallel backtest tasks
"""
import threading
import time
import sys
from typing import Dict, Optional, Callable
from dataclasses import dataclass
@dataclass
class TaskProgress:
"""Represents progress information for a single task"""
task_id: str
name: str
current: int
total: int
start_time: float
last_update: float
@property
def percentage(self) -> float:
"""Calculate completion percentage"""
if self.total == 0:
return 0.0
return (self.current / self.total) * 100
@property
def elapsed_time(self) -> float:
"""Calculate elapsed time in seconds"""
return time.time() - self.start_time
@property
def eta(self) -> Optional[float]:
"""Estimate time to completion in seconds"""
if self.current == 0 or self.percentage >= 100:
return None
elapsed = self.elapsed_time
rate = self.current / elapsed
remaining = self.total - self.current
return remaining / rate if rate > 0 else None
class ProgressManager:
"""Manages progress tracking for multiple parallel tasks"""
def __init__(self, update_interval: float = 1.0, display_width: int = 50):
"""
Initialize progress manager
Args:
update_interval: How often to update display (seconds)
display_width: Width of progress bar in characters
"""
self.tasks: Dict[str, TaskProgress] = {}
self.update_interval = update_interval
self.display_width = display_width
self.lock = threading.Lock()
self.display_thread: Optional[threading.Thread] = None
self.running = False
self.last_display_height = 0
def start_task(self, task_id: str, name: str, total: int) -> None:
"""
Start tracking a new task
Args:
task_id: Unique identifier for the task
name: Human-readable name for the task
total: Total number of steps in the task
"""
with self.lock:
self.tasks[task_id] = TaskProgress(
task_id=task_id,
name=name,
current=0,
total=total,
start_time=time.time(),
last_update=time.time()
)
def update_progress(self, task_id: str, current: int) -> None:
"""
Update progress for a specific task
Args:
task_id: Task identifier
current: Current progress value
"""
with self.lock:
if task_id in self.tasks:
self.tasks[task_id].current = current
self.tasks[task_id].last_update = time.time()
def complete_task(self, task_id: str) -> None:
"""
Mark a task as completed
Args:
task_id: Task identifier
"""
with self.lock:
if task_id in self.tasks:
task = self.tasks[task_id]
task.current = task.total
task.last_update = time.time()
def start_display(self) -> None:
"""Start the progress display thread"""
if not self.running:
self.running = True
self.display_thread = threading.Thread(target=self._display_loop, daemon=True)
self.display_thread.start()
def stop_display(self) -> None:
"""Stop the progress display thread"""
self.running = False
if self.display_thread:
self.display_thread.join(timeout=1.0)
self._clear_display()
def _display_loop(self) -> None:
"""Main loop for updating the progress display"""
while self.running:
self._update_display()
time.sleep(self.update_interval)
def _update_display(self) -> None:
"""Update the console display with current progress"""
with self.lock:
if not self.tasks:
return
# Clear previous display
self._clear_display()
# Build display lines
lines = []
for task in sorted(self.tasks.values(), key=lambda t: t.task_id):
line = self._format_progress_line(task)
lines.append(line)
# Print all lines
for line in lines:
print(line, flush=True)
self.last_display_height = len(lines)
def _clear_display(self) -> None:
"""Clear the previous progress display"""
if self.last_display_height > 0:
# Move cursor up and clear lines
for _ in range(self.last_display_height):
sys.stdout.write('\033[F') # Move cursor up one line
sys.stdout.write('\033[K') # Clear line
sys.stdout.flush()
def _format_progress_line(self, task: TaskProgress) -> str:
"""
Format a single progress line for display
Args:
task: TaskProgress instance
Returns:
Formatted progress string
"""
# Progress bar
filled_width = int(task.percentage / 100 * self.display_width)
bar = '' * filled_width + '' * (self.display_width - filled_width)
# Time information
elapsed_str = self._format_time(task.elapsed_time)
eta_str = self._format_time(task.eta) if task.eta else "N/A"
# Format line
line = (f"{task.name:<25}{bar}"
f"{task.percentage:5.1f}% "
f"({task.current:,}/{task.total:,}) "
f"{elapsed_str} ETA: {eta_str}")
return line
def _format_time(self, seconds: float) -> str:
"""
Format time duration for display
Args:
seconds: Time in seconds
Returns:
Formatted time string
"""
if seconds < 60:
return f"{seconds:.0f}s"
elif seconds < 3600:
minutes = seconds / 60
return f"{minutes:.1f}m"
else:
hours = seconds / 3600
return f"{hours:.1f}h"
def get_task_progress_callback(self, task_id: str) -> Callable[[int], None]:
"""
Get a progress callback function for a specific task
Args:
task_id: Task identifier
Returns:
Callback function that updates progress for this task
"""
def callback(current: int) -> None:
self.update_progress(task_id, current)
return callback
def all_tasks_completed(self) -> bool:
"""Check if all tasks are completed"""
with self.lock:
return all(task.current >= task.total for task in self.tasks.values())
def get_summary(self) -> str:
"""Get a summary of all tasks"""
with self.lock:
total_tasks = len(self.tasks)
completed_tasks = sum(1 for task in self.tasks.values()
if task.current >= task.total)
return f"Tasks: {completed_tasks}/{total_tasks} completed"

View File

@@ -10,10 +10,12 @@ class SystemUtils:
"""Determine optimal number of worker processes based on system resources"""
cpu_count = os.cpu_count() or 4
memory_gb = psutil.virtual_memory().total / (1024**3)
# Heuristic: Use 75% of cores, but cap based on available memory
# Assume each worker needs ~2GB for large datasets
workers_by_memory = max(1, int(memory_gb / 2))
workers_by_cpu = max(1, int(cpu_count * 0.75))
# OPTIMIZATION: More aggressive worker allocation for better performance
workers_by_memory = max(1, int(memory_gb / 2)) # 2GB per worker
workers_by_cpu = max(1, int(cpu_count * 0.8)) # Use 80% of CPU cores
optimal_workers = min(workers_by_cpu, workers_by_memory, 8) # Cap at 8 workers
if self.logging is not None:
self.logging.info(f"Using {min(workers_by_cpu, workers_by_memory)} workers for processing")
return min(workers_by_cpu, workers_by_memory)
self.logging.info(f"Using {optimal_workers} workers for processing (CPU-based: {workers_by_cpu}, Memory-based: {workers_by_memory})")
return optimal_workers