import os import psutil class SystemUtils: def __init__(self, logging=None): self.logging = logging def get_optimal_workers(self): """Determine optimal number of worker processes based on system resources""" cpu_count = os.cpu_count() or 4 memory_gb = psutil.virtual_memory().total / (1024**3) # Heuristic: Use 75% of cores, but cap based on available memory # Assume each worker needs ~2GB for large datasets workers_by_memory = max(1, int(memory_gb / 2)) workers_by_cpu = max(1, int(cpu_count * 0.75)) if self.logging is not None: self.logging.info(f"Using {min(workers_by_cpu, workers_by_memory)} workers for processing") return min(workers_by_cpu, workers_by_memory)