[llvm] [LIT] Workaround the 60 processed limit on Windows (PR #157759)

Wed Sep 17 00:51:22 PDT 2025

https://github.com/joker-eph updated https://github.com/llvm/llvm-project/pull/157759

>From 1930ea44433d94cfef79fd338baef3e889053c2a Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Tue, 9 Sep 2025 15:11:11 -0700
Subject: [PATCH 1/2] [LIT] Workaround the 60 processed limit on Windows

Python multiprocessing is limited to 60 workers at most:

https://github.com/python/cpython/blob/6bc65c30ff1fd0b581a2c93416496fc720bc442c/Lib/concurrent/futures/process.py#L669-L672

The limit being per thread pool, we can work around it by using
multiple pools on windows when we want to actually use more workers.
---
 llvm/utils/lit/lit/run.py  | 53 +++++++++++++++++++++++++++++++-------
 llvm/utils/lit/lit/util.py |  5 ----
 2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py
index 62070e824e87f..5109aad97eb8f 100644
--- a/llvm/utils/lit/lit/run.py
+++ b/llvm/utils/lit/lit/run.py
@@ -72,25 +72,58 @@ def _execute(self, deadline):
             if v is not None
         }
 
-        pool = multiprocessing.Pool(
-            self.workers, lit.worker.initialize, (self.lit_config, semaphores)
+        # Windows has a limit of 60 workers per pool, so we need to use multiple pools
+        # if we have more than 60 workers requested
+        max_workers_per_pool = 60 if os.name == "nt" else self.workers
+        num_pools = max(
+            1, (self.workers + max_workers_per_pool - 1) // max_workers_per_pool
         )
+        workers_per_pool = min(self.workers, max_workers_per_pool)
 
-        async_results = [
-            pool.apply_async(
-                lit.worker.execute, args=[test], callback=self.progress_callback
+        if num_pools > 1:
+            self.lit_config.note(
+                "Using %d pools with %d workers each (Windows worker limit workaround)"
+                % (num_pools, workers_per_pool)
             )
-            for test in self.tests
-        ]
-        pool.close()
+
+        # Create multiple pools
+        pools = []
+        for i in range(num_pools):
+            pool = multiprocessing.Pool(
+                workers_per_pool, lit.worker.initialize, (self.lit_config, semaphores)
+            )
+            pools.append(pool)
+
+        # Distribute tests across pools
+        tests_per_pool = (len(self.tests) + num_pools - 1) // num_pools
+        async_results = []
+
+        for pool_idx, pool in enumerate(pools):
+            start_idx = pool_idx * tests_per_pool
+            end_idx = min(start_idx + tests_per_pool, len(self.tests))
+            pool_tests = self.tests[start_idx:end_idx]
+
+            for test in pool_tests:
+                ar = pool.apply_async(
+                    lit.worker.execute, args=[test], callback=self.progress_callback
+                )
+                async_results.append(ar)
+
+        # Close all pools
+        for pool in pools:
+            pool.close()
 
         try:
             self._wait_for(async_results, deadline)
         except:
-            pool.terminate()
+            # Terminate all pools on exception
+            for pool in pools:
+                pool.terminate()
             raise
         finally:
-            pool.join()
+            # Join all pools
+            for pool in pools:
+                pool.join()
 
     def _wait_for(self, async_results, deadline):
         timeout = deadline - time.time()
diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py
index ce4c3c2df3436..518c1a3029b86 100644
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@@ -113,11 +113,6 @@ def usable_core_count():
     except AttributeError:
         n = os.cpu_count() or 1
 
-    # On Windows with more than 60 processes, multiprocessing's call to
-    # _winapi.WaitForMultipleObjects() prints an error and lit hangs.
-    if platform.system() == "Windows":
-        return min(n, 60)
-
     return n
 
 def abs_path_preserve_drive(path):

>From 19353445093cb006b1a7e499d1d00b4d764669d3 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Tue, 16 Sep 2025 13:20:52 -0700
Subject: [PATCH 2/2] Distribute workers more evenly

---
 llvm/utils/lit/lit/run.py | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py
index 5109aad97eb8f..9654b0d11578f 100644
--- a/llvm/utils/lit/lit/run.py
+++ b/llvm/utils/lit/lit/run.py
@@ -7,6 +7,14 @@
 import lit.util
 import lit.worker
 
+# Windows has a limit of 60 workers per pool.
+# This is defined in the multiprocessing module implementation.
+# See: https://github.com/python/cpython/blob/6bc65c30ff1fd0b581a2c93416496fc720bc442c/Lib/concurrent/futures/process.py#L669-L672
+WINDOWS_MAX_WORKERS_PER_POOL = 60
+
+
+def _ceilDiv(a, b):
+    return (a + b - 1) // b
 
 class MaxFailuresError(Exception):
     pass
@@ -73,37 +81,39 @@ def _execute(self, deadline):
         }
 
         # Windows has a limit of 60 workers per pool, so we need to use multiple pools
-        # if we have more than 60 workers requested
-        max_workers_per_pool = 60 if os.name == "nt" else self.workers
-        num_pools = max(
-            1, (self.workers + max_workers_per_pool - 1) // max_workers_per_pool
+        # if we have more workers requested than the limit.
+        max_workers_per_pool = (
+            WINDOWS_MAX_WORKERS_PER_POOL if os.name == "nt" else self.workers
         )
-        workers_per_pool = min(self.workers, max_workers_per_pool)
+        num_pools = max(1, _ceilDiv(self.workers, max_workers_per_pool))
+
+        # Distribute self.workers across num_pools as evenly as possible
+        workers_per_pool_list = [self.workers // num_pools] * num_pools
+        for pool_idx in range(self.workers % num_pools):
+            workers_per_pool_list[pool_idx] += 1
 
         if num_pools > 1:
             self.lit_config.note(
-                "Using %d pools with %d workers each (Windows worker limit workaround)"
-                % (num_pools, workers_per_pool)
+                "Using %d pools balancing %d workers total distributed as %s (Windows worker limit workaround)"
+                % (num_pools, self.workers, workers_per_pool_list)
             )
 
         # Create multiple pools
         pools = []
-        for i in range(num_pools):
+        for pool_size in workers_per_pool_list:
             pool = multiprocessing.Pool(
-                workers_per_pool, lit.worker.initialize, (self.lit_config, semaphores)
+                pool_size, lit.worker.initialize, (self.lit_config, semaphores)
             )
             pools.append(pool)
 
         # Distribute tests across pools
-        tests_per_pool = (len(self.tests) + num_pools - 1) // num_pools
+        tests_per_pool = _ceilDiv(len(self.tests), num_pools)
         async_results = []
 
         for pool_idx, pool in enumerate(pools):
             start_idx = pool_idx * tests_per_pool
             end_idx = min(start_idx + tests_per_pool, len(self.tests))
-            pool_tests = self.tests[start_idx:end_idx]
-
-            for test in pool_tests:
+            for test in self.tests[start_idx:end_idx]:
                 ar = pool.apply_async(
                     lit.worker.execute, args=[test], callback=self.progress_callback
                 )