[llvm] 577cd6f - [LIT] Workaround the 60 processed limit on Windows (#157759)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 1 03:39:29 PST 2025
Author: Mehdi Amini
Date: 2025-12-01T11:39:25Z
New Revision: 577cd6fb02959270dcdc48864ea0fba1d540cef4
URL: https://github.com/llvm/llvm-project/commit/577cd6fb02959270dcdc48864ea0fba1d540cef4
DIFF: https://github.com/llvm/llvm-project/commit/577cd6fb02959270dcdc48864ea0fba1d540cef4.diff
LOG: [LIT] Workaround the 60 processed limit on Windows (#157759)
Python multiprocessing is limited to 60 workers at most:
https://github.com/python/cpython/blob/6bc65c30ff1fd0b581a2c93416496fc720bc442c/Lib/concurrent/futures/process.py#L669-L672
The limit being per thread pool, we can work around it by using multiple
pools on windows when we want to actually use more workers.
Added:
llvm/utils/lit/tests/windows-pools.py
Modified:
llvm/utils/lit/lit/run.py
llvm/utils/lit/lit/util.py
Removed:
################################################################################
diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py
index 3fc4a1b9b40bd..9c54511bfd625 100644
--- a/llvm/utils/lit/lit/run.py
+++ b/llvm/utils/lit/lit/run.py
@@ -7,6 +7,14 @@
import lit.util
import lit.worker
+# Windows has a limit of 60 workers per pool.
+# This is defined in the multiprocessing module implementation.
+# See: https://github.com/python/cpython/blob/6bc65c30ff1fd0b581a2c93416496fc720bc442c/Lib/concurrent/futures/process.py#L669-L672
+WINDOWS_MAX_WORKERS_PER_POOL = 60
+
+
+def _ceilDiv(a, b):
+ return (a + b - 1) // b
class MaxFailuresError(Exception):
pass
@@ -72,25 +80,65 @@ def _execute(self, deadline):
if v is not None
}
- pool = multiprocessing.Pool(
- self.workers, lit.worker.initialize, (self.lit_config, semaphores)
+ # Windows has a limit of 60 workers per pool, so we need to use multiple pools
+ # if we have more workers requested than the limit.
+ # Also, allow to override the limit with the LIT_WINDOWS_MAX_WORKERS_PER_POOL environment variable.
+ max_workers_per_pool = (
+ WINDOWS_MAX_WORKERS_PER_POOL if os.name == "nt" else self.workers
+ )
+ max_workers_per_pool = int(
+ os.getenv("LIT_WINDOWS_MAX_WORKERS_PER_POOL", max_workers_per_pool)
)
- async_results = [
- pool.apply_async(
- lit.worker.execute, args=[test], callback=self.progress_callback
+ num_pools = max(1, _ceilDiv(self.workers, max_workers_per_pool))
+
+ # Distribute self.workers across num_pools as evenly as possible
+ workers_per_pool_list = [self.workers // num_pools] * num_pools
+ for pool_idx in range(self.workers % num_pools):
+ workers_per_pool_list[pool_idx] += 1
+
+ if num_pools > 1:
+ self.lit_config.note(
+ "Using %d pools balancing %d workers total distributed as %s (Windows worker limit workaround)"
+ % (num_pools, self.workers, workers_per_pool_list)
)
- for test in self.tests
- ]
- pool.close()
+
+ # Create multiple pools
+ pools = []
+ for pool_size in workers_per_pool_list:
+ pool = multiprocessing.Pool(
+ pool_size, lit.worker.initialize, (self.lit_config, semaphores)
+ )
+ pools.append(pool)
+
+ # Distribute tests across pools
+ tests_per_pool = _ceilDiv(len(self.tests), num_pools)
+ async_results = []
+
+ for pool_idx, pool in enumerate(pools):
+ start_idx = pool_idx * tests_per_pool
+ end_idx = min(start_idx + tests_per_pool, len(self.tests))
+ for test in self.tests[start_idx:end_idx]:
+ ar = pool.apply_async(
+ lit.worker.execute, args=[test], callback=self.progress_callback
+ )
+ async_results.append(ar)
+
+ # Close all pools
+ for pool in pools:
+ pool.close()
try:
self._wait_for(async_results, deadline)
except:
- pool.terminate()
+ # Terminate all pools on exception
+ for pool in pools:
+ pool.terminate()
raise
finally:
- pool.join()
+ # Join all pools
+ for pool in pools:
+ pool.join()
def _wait_for(self, async_results, deadline):
timeout = deadline - time.time()
diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py
index e4e031b3e0898..6f25fbc94b757 100644
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@@ -114,11 +114,6 @@ def usable_core_count():
except AttributeError:
n = os.cpu_count() or 1
- # On Windows with more than 60 processes, multiprocessing's call to
- # _winapi.WaitForMultipleObjects() prints an error and lit hangs.
- if platform.system() == "Windows":
- return min(n, 60)
-
return n
def abs_path_preserve_drive(path):
diff --git a/llvm/utils/lit/tests/windows-pools.py b/llvm/utils/lit/tests/windows-pools.py
new file mode 100644
index 0000000000000..85110b37c2601
--- /dev/null
+++ b/llvm/utils/lit/tests/windows-pools.py
@@ -0,0 +1,27 @@
+# Create a directory with 20 files and check the number of pools and workers per pool that lit will use.
+
+# RUN: rm -Rf %t.dir && mkdir -p %t.dir
+# RUN: python -c "for i in range(20): open(rf'%t.dir/file{i}.txt', 'w').write('RUN:')"
+
+# RUN: echo "import lit.formats" > %t.dir/lit.cfg
+# RUN: echo "config.name = \"top-level-suite\"" >> %t.dir/lit.cfg
+# RUN: echo "config.suffixes = [\".txt\"]" >> %t.dir/lit.cfg
+# RUN: echo "config.test_format = lit.formats.ShTest()" >> %t.dir/lit.cfg
+
+
+# 15 workers per pool max, 100 workers total max: we expect lit to cap the workers to the number of files
+# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=15" %{lit} -s %t.dir/ -j100 > %t.out 2>&1
+# CHECK: Using 2 pools balancing 20 workers total distributed as [10, 10]
+# CHECK: Passed: 20
+
+# 5 workers per pool max, 17 workers total max
+# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=5" %{lit} -s %t.dir/ -j17 >> %t.out 2>&1
+# CHECK: Using 4 pools balancing 17 workers total distributed as [5, 4, 4, 4]
+# CHECK: Passed: 20
+
+# 19 workers per pool max, 19 workers total max
+# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=19" %{lit} -s %t.dir/ -j19 >> %t.out 2>&1
+# CHECK-NOT: workers total distributed as
+# CHECK: Passed: 20
+
+# RUN: cat %t.out | FileCheck %s
More information about the llvm-commits
mailing list