Enhance backtesting framework with static task processing and progress management. Introduced static task processing for parallel execution, improved error handling, and added a progress manager for better task tracking. Updated BacktestRunner to support progress callbacks and optimized worker allocation based on system resources. Added new configuration files for flexible backtesting setups.

2025-07-10 10:23:41 +08:00
parent be331ed631
commit 65f30a4020
11 changed files with 830 additions and 156 deletions
--- a/cycles/utils/progress_manager.py
+++ b/cycles/utils/progress_manager.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+Progress Manager for tracking multiple parallel backtest tasks
+"""
+
+import threading
+import time
+import sys
+from typing import Dict, Optional, Callable
+from dataclasses import dataclass
+
+
+@dataclass
+class TaskProgress:
+    """Represents progress information for a single task"""
+    task_id: str
+    name: str
+    current: int
+    total: int
+    start_time: float
+    last_update: float
+    
+    @property
+    def percentage(self) -> float:
+        """Calculate completion percentage"""
+        if self.total == 0:
+            return 0.0
+        return (self.current / self.total) * 100
+    
+    @property
+    def elapsed_time(self) -> float:
+        """Calculate elapsed time in seconds"""
+        return time.time() - self.start_time
+    
+    @property
+    def eta(self) -> Optional[float]:
+        """Estimate time to completion in seconds"""
+        if self.current == 0 or self.percentage >= 100:
+            return None
+        
+        elapsed = self.elapsed_time
+        rate = self.current / elapsed
+        remaining = self.total - self.current
+        return remaining / rate if rate > 0 else None
+
+
+class ProgressManager:
+    """Manages progress tracking for multiple parallel tasks"""
+    
+    def __init__(self, update_interval: float = 1.0, display_width: int = 50):
+        """
+        Initialize progress manager
+        
+        Args:
+            update_interval: How often to update display (seconds)
+            display_width: Width of progress bar in characters
+        """
+        self.tasks: Dict[str, TaskProgress] = {}
+        self.update_interval = update_interval
+        self.display_width = display_width
+        self.lock = threading.Lock()
+        self.display_thread: Optional[threading.Thread] = None
+        self.running = False
+        self.last_display_height = 0
+        
+    def start_task(self, task_id: str, name: str, total: int) -> None:
+        """
+        Start tracking a new task
+        
+        Args:
+            task_id: Unique identifier for the task
+            name: Human-readable name for the task
+            total: Total number of steps in the task
+        """
+        with self.lock:
+            self.tasks[task_id] = TaskProgress(
+                task_id=task_id,
+                name=name,
+                current=0,
+                total=total,
+                start_time=time.time(),
+                last_update=time.time()
+            )
+    
+    def update_progress(self, task_id: str, current: int) -> None:
+        """
+        Update progress for a specific task
+        
+        Args:
+            task_id: Task identifier
+            current: Current progress value
+        """
+        with self.lock:
+            if task_id in self.tasks:
+                self.tasks[task_id].current = current
+                self.tasks[task_id].last_update = time.time()
+    
+    def complete_task(self, task_id: str) -> None:
+        """
+        Mark a task as completed
+        
+        Args:
+            task_id: Task identifier
+        """
+        with self.lock:
+            if task_id in self.tasks:
+                task = self.tasks[task_id]
+                task.current = task.total
+                task.last_update = time.time()
+    
+    def start_display(self) -> None:
+        """Start the progress display thread"""
+        if not self.running:
+            self.running = True
+            self.display_thread = threading.Thread(target=self._display_loop, daemon=True)
+            self.display_thread.start()
+    
+    def stop_display(self) -> None:
+        """Stop the progress display thread"""
+        self.running = False
+        if self.display_thread:
+            self.display_thread.join(timeout=1.0)
+        self._clear_display()
+    
+    def _display_loop(self) -> None:
+        """Main loop for updating the progress display"""
+        while self.running:
+            self._update_display()
+            time.sleep(self.update_interval)
+    
+    def _update_display(self) -> None:
+        """Update the console display with current progress"""
+        with self.lock:
+            if not self.tasks:
+                return
+            
+            # Clear previous display
+            self._clear_display()
+            
+            # Build display lines
+            lines = []
+            for task in sorted(self.tasks.values(), key=lambda t: t.task_id):
+                line = self._format_progress_line(task)
+                lines.append(line)
+            
+            # Print all lines
+            for line in lines:
+                print(line, flush=True)
+            
+            self.last_display_height = len(lines)
+    
+    def _clear_display(self) -> None:
+        """Clear the previous progress display"""
+        if self.last_display_height > 0:
+            # Move cursor up and clear lines
+            for _ in range(self.last_display_height):
+                sys.stdout.write('\033[F')  # Move cursor up one line
+                sys.stdout.write('\033[K')  # Clear line
+            sys.stdout.flush()
+    
+    def _format_progress_line(self, task: TaskProgress) -> str:
+        """
+        Format a single progress line for display
+        
+        Args:
+            task: TaskProgress instance
+            
+        Returns:
+            Formatted progress string
+        """
+        # Progress bar
+        filled_width = int(task.percentage / 100 * self.display_width)
+        bar = '█' * filled_width + '░' * (self.display_width - filled_width)
+        
+        # Time information
+        elapsed_str = self._format_time(task.elapsed_time)
+        eta_str = self._format_time(task.eta) if task.eta else "N/A"
+        
+        # Format line
+        line = (f"{task.name:<25} │{bar}│ "
+                f"{task.percentage:5.1f}% "
+                f"({task.current:,}/{task.total:,}) "
+                f"⏱ {elapsed_str} ETA: {eta_str}")
+        
+        return line
+    
+    def _format_time(self, seconds: float) -> str:
+        """
+        Format time duration for display
+        
+        Args:
+            seconds: Time in seconds
+            
+        Returns:
+            Formatted time string
+        """
+        if seconds < 60:
+            return f"{seconds:.0f}s"
+        elif seconds < 3600:
+            minutes = seconds / 60
+            return f"{minutes:.1f}m"
+        else:
+            hours = seconds / 3600
+            return f"{hours:.1f}h"
+    
+    def get_task_progress_callback(self, task_id: str) -> Callable[[int], None]:
+        """
+        Get a progress callback function for a specific task
+        
+        Args:
+            task_id: Task identifier
+            
+        Returns:
+            Callback function that updates progress for this task
+        """
+        def callback(current: int) -> None:
+            self.update_progress(task_id, current)
+        
+        return callback
+    
+    def all_tasks_completed(self) -> bool:
+        """Check if all tasks are completed"""
+        with self.lock:
+            return all(task.current >= task.total for task in self.tasks.values())
+    
+    def get_summary(self) -> str:
+        """Get a summary of all tasks"""
+        with self.lock:
+            total_tasks = len(self.tasks)
+            completed_tasks = sum(1 for task in self.tasks.values() 
+                                if task.current >= task.total)
+            
+            return f"Tasks: {completed_tasks}/{total_tasks} completed" 
--- a/cycles/utils/system.py
+++ b/cycles/utils/system.py
@@ -10,10 +10,12 @@ class SystemUtils:
        """Determine optimal number of worker processes based on system resources"""
        cpu_count = os.cpu_count() or 4
        memory_gb = psutil.virtual_memory().total / (1024**3)
-        # Heuristic: Use 75% of cores, but cap based on available memory
-        # Assume each worker needs ~2GB for large datasets
-        workers_by_memory = max(1, int(memory_gb / 2))
-        workers_by_cpu = max(1, int(cpu_count * 0.75))
+        
+        # OPTIMIZATION: More aggressive worker allocation for better performance
+        workers_by_memory = max(1, int(memory_gb / 2))  # 2GB per worker
+        workers_by_cpu = max(1, int(cpu_count * 0.8))  # Use 80% of CPU cores
+        optimal_workers = min(workers_by_cpu, workers_by_memory, 8)  # Cap at 8 workers
+        
        if self.logging is not None:
-            self.logging.info(f"Using {min(workers_by_cpu, workers_by_memory)} workers for processing")
-        return min(workers_by_cpu, workers_by_memory)
+            self.logging.info(f"Using {optimal_workers} workers for processing (CPU-based: {workers_by_cpu}, Memory-based: {workers_by_memory})")
+        return optimal_workers