[llvm-branch-commits] [llvm] 4fcb255 - Re-land [Support] On Windows, take the affinity mask into account

Alexandre Ganea via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jan 14 14:08:04 PST 2021


Author: Alexandre Ganea
Date: 2021-01-14T17:03:22-05:00
New Revision: 4fcb25583c3ccbe10c4367d02086269e5fa0bb87

URL: https://github.com/llvm/llvm-project/commit/4fcb25583c3ccbe10c4367d02086269e5fa0bb87
DIFF: https://github.com/llvm/llvm-project/commit/4fcb25583c3ccbe10c4367d02086269e5fa0bb87.diff

LOG: Re-land [Support] On Windows, take the affinity mask into account

The number of hardware threads available to a ThreadPool can be limited if setting an affinity mask.
For example:

    > start /B /AFFINITY 0xF lld-link.exe ...

Would let LLD only use 4 hyper-threads.

Previously, there was an outstanding issue on Windows Server 2019 on dual-CPU machines, which was preventing from using both CPU sockets. In normal conditions, when no affinity mask was set, ProcessorGroup::AllThreads was different from ProcessorGroup::UsableThreads. The previous code in llvm/lib/Support/Windows/Threading.inc L201 was improperly assuming those two values to be equal, and consequently was limiting the execution to only one CPU socket.

Differential Revision: https://reviews.llvm.org/D92419

Added: 
    

Modified: 
    llvm/include/llvm/Support/Program.h
    llvm/lib/Support/Program.cpp
    llvm/lib/Support/Unix/Program.inc
    llvm/lib/Support/Windows/Program.inc
    llvm/lib/Support/Windows/Threading.inc
    llvm/unittests/Support/ThreadPool.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h
index b32de47adb57..bfd271958788 100644
--- a/llvm/include/llvm/Support/Program.h
+++ b/llvm/include/llvm/Support/Program.h
@@ -14,6 +14,7 @@
 #define LLVM_SUPPORT_PROGRAM_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/llvm-config.h"
@@ -125,9 +126,11 @@ namespace sys {
       ///< string is non-empty upon return an error occurred while invoking the
       ///< program.
       bool *ExecutionFailed = nullptr,
-      Optional<ProcessStatistics> *ProcStat = nullptr ///< If non-zero, provides
-      /// a pointer to a structure in which process execution statistics will be
-      /// stored.
+      Optional<ProcessStatistics> *ProcStat = nullptr, ///< If non-zero,
+      /// provides a pointer to a structure in which process execution
+      /// statistics will be stored.
+      BitVector *AffinityMask = nullptr ///< CPUs or processors the new
+                                        /// program shall run on.
   );
 
   /// Similar to ExecuteAndWait, but returns immediately.
@@ -140,7 +143,8 @@ namespace sys {
                             ArrayRef<Optional<StringRef>> Redirects = {},
                             unsigned MemoryLimit = 0,
                             std::string *ErrMsg = nullptr,
-                            bool *ExecutionFailed = nullptr);
+                            bool *ExecutionFailed = nullptr,
+                            BitVector *AffinityMask = nullptr);
 
   /// Return true if the given arguments fit within system-specific
   /// argument length limits.

diff  --git a/llvm/lib/Support/Program.cpp b/llvm/lib/Support/Program.cpp
index 5294f65bd5a5..c7a59642b27e 100644
--- a/llvm/lib/Support/Program.cpp
+++ b/llvm/lib/Support/Program.cpp
@@ -26,17 +26,20 @@ using namespace sys;
 static bool Execute(ProcessInfo &PI, StringRef Program,
                     ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
                     ArrayRef<Optional<StringRef>> Redirects,
-                    unsigned MemoryLimit, std::string *ErrMsg);
+                    unsigned MemoryLimit, std::string *ErrMsg,
+                    BitVector *AffinityMask);
 
 int sys::ExecuteAndWait(StringRef Program, ArrayRef<StringRef> Args,
                         Optional<ArrayRef<StringRef>> Env,
                         ArrayRef<Optional<StringRef>> Redirects,
                         unsigned SecondsToWait, unsigned MemoryLimit,
                         std::string *ErrMsg, bool *ExecutionFailed,
-                        Optional<ProcessStatistics> *ProcStat) {
+                        Optional<ProcessStatistics> *ProcStat,
+                        BitVector *AffinityMask) {
   assert(Redirects.empty() || Redirects.size() == 3);
   ProcessInfo PI;
-  if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg)) {
+  if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
+              AffinityMask)) {
     if (ExecutionFailed)
       *ExecutionFailed = false;
     ProcessInfo Result =
@@ -55,12 +58,13 @@ ProcessInfo sys::ExecuteNoWait(StringRef Program, ArrayRef<StringRef> Args,
                                Optional<ArrayRef<StringRef>> Env,
                                ArrayRef<Optional<StringRef>> Redirects,
                                unsigned MemoryLimit, std::string *ErrMsg,
-                               bool *ExecutionFailed) {
+                               bool *ExecutionFailed, BitVector *AffinityMask) {
   assert(Redirects.empty() || Redirects.size() == 3);
   ProcessInfo PI;
   if (ExecutionFailed)
     *ExecutionFailed = false;
-  if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg))
+  if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
+               AffinityMask))
     if (ExecutionFailed)
       *ExecutionFailed = true;
 

diff  --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc
index 8f41fc015163..fb56fa4b0d1d 100644
--- a/llvm/lib/Support/Unix/Program.inc
+++ b/llvm/lib/Support/Unix/Program.inc
@@ -174,7 +174,8 @@ toNullTerminatedCStringArray(ArrayRef<StringRef> Strings, StringSaver &Saver) {
 static bool Execute(ProcessInfo &PI, StringRef Program,
                     ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
                     ArrayRef<Optional<StringRef>> Redirects,
-                    unsigned MemoryLimit, std::string *ErrMsg) {
+                    unsigned MemoryLimit, std::string *ErrMsg,
+                    BitVector *AffinityMask) {
   if (!llvm::sys::fs::exists(Program)) {
     if (ErrMsg)
       *ErrMsg = std::string("Executable \"") + Program.str() +
@@ -182,6 +183,9 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
     return false;
   }
 
+  assert(!AffinityMask && "Starting a process with an affinity mask is "
+                          "currently not supported on Unix!");
+
   BumpPtrAllocator Allocator;
   StringSaver Saver(Allocator);
   std::vector<const char *> ArgVector, EnvVector;

diff  --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index 91df4488850e..f1d612cf3c98 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -171,7 +171,8 @@ static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
 static bool Execute(ProcessInfo &PI, StringRef Program,
                     ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
                     ArrayRef<Optional<StringRef>> Redirects,
-                    unsigned MemoryLimit, std::string *ErrMsg) {
+                    unsigned MemoryLimit, std::string *ErrMsg,
+                    BitVector *AffinityMask) {
   if (!sys::fs::can_execute(Program)) {
     if (ErrMsg)
       *ErrMsg = "program not executable";
@@ -277,11 +278,15 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
     return false;
   }
 
+  unsigned CreateFlags = CREATE_UNICODE_ENVIRONMENT;
+  if (AffinityMask)
+    CreateFlags |= CREATE_SUSPENDED;
+
   std::vector<wchar_t> CommandUtf16(Command.size() + 1, 0);
   std::copy(Command.begin(), Command.end(), CommandUtf16.begin());
   BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, TRUE,
-                           CREATE_UNICODE_ENVIRONMENT,
-                           EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, &pi);
+                           CreateFlags, EnvBlock.empty() ? 0 : EnvBlock.data(),
+                           0, &si, &pi);
   DWORD err = GetLastError();
 
   // Regardless of whether the process got created or not, we are done with
@@ -329,6 +334,13 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
     }
   }
 
+  // Set the affinity mask
+  if (AffinityMask) {
+    ::SetProcessAffinityMask(pi.hProcess,
+                             (DWORD_PTR)AffinityMask->getData().front());
+    ::ResumeThread(pi.hThread);
+  }
+
   return true;
 }
 

diff  --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc
index 296e87b77695..6448bb478d0c 100644
--- a/llvm/lib/Support/Windows/Threading.inc
+++ b/llvm/lib/Support/Windows/Threading.inc
@@ -195,14 +195,27 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
     if (!IterateProcInfo(RelationProcessorCore, HandleProc))
       return std::vector<ProcessorGroup>();
 
-    // If there's an affinity mask set on one of the CPUs, then assume the user
-    // wants to constrain the current process to only a single CPU.
-    for (auto &G : Groups) {
-      if (G.UsableThreads != G.AllThreads) {
-        ProcessorGroup NewG{G};
+    // If there's an affinity mask set, assume the user wants to constrain the
+    // current process to only a single CPU group. On Windows, it is not
+    // possible for affinity masks to cross CPU group boundaries.
+    DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
+    if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
+                                 &SystemAffinityMask) &&
+        ProcessAffinityMask != SystemAffinityMask) {
+      // We don't expect more that 4 CPU groups on Windows (256 processors).
+      USHORT GroupCount = 4;
+      USHORT GroupArray[4]{};
+      if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
+                                    GroupArray)) {
+        assert(GroupCount == 1 &&
+               "On startup, a program is expected to be assigned only to "
+               "one processor group!");
+        unsigned CurrentGroupID = GroupArray[0];
+        ProcessorGroup NewG{Groups[CurrentGroupID]};
+        NewG.Affinity = ProcessAffinityMask;
+        NewG.UsableThreads = countPopulation(ProcessAffinityMask);
         Groups.clear();
         Groups.push_back(NewG);
-        break;
       }
     }
 

diff  --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp
index 43882d0f3cee..12a2b76036c0 100644
--- a/llvm/unittests/Support/ThreadPool.cpp
+++ b/llvm/unittests/Support/ThreadPool.cpp
@@ -8,11 +8,13 @@
 
 #include "llvm/Support/ThreadPool.h"
 
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/Threading.h"
 
@@ -45,6 +47,14 @@ class ThreadPoolTest : public testing::Test {
     return false;
   }
 
+  bool isWindows() {
+    // FIXME: Skip some tests below on non-Windows because multi-socket systems
+    // were not fully tested on Unix yet, and llvm::get_thread_affinity_mask()
+    // isn't implemented for Unix.
+    Triple Host(Triple::normalize(sys::getProcessTriple()));
+    return Host.isOSWindows();
+  }
+
   ThreadPoolTest() {
     // Add unsupported configuration here, example:
     //   UnsupportedArchs.push_back(Triple::x86_64);
@@ -71,18 +81,24 @@ class ThreadPoolTest : public testing::Test {
 
   void SetUp() override { MainThreadReady = false; }
 
-  void RunOnAllSockets(ThreadPoolStrategy S);
+  std::vector<llvm::BitVector> RunOnAllSockets(ThreadPoolStrategy S);
 
   std::condition_variable WaitMainThread;
   std::mutex WaitMainThreadMutex;
   bool MainThreadReady = false;
 };
 
-#define CHECK_UNSUPPORTED() \
-  do { \
-    if (isUnsupportedOSOrEnvironment()) \
-      return; \
-  } while (0); \
+#define CHECK_UNSUPPORTED()                                                    \
+  do {                                                                         \
+    if (isUnsupportedOSOrEnvironment())                                        \
+      return;                                                                  \
+  } while (0);
+
+#define SKIP_NON_WINDOWS()                                                     \
+  do {                                                                         \
+    if (!isWindows())                                                          \
+      return;                                                                  \
+  } while (0);
 
 TEST_F(ThreadPoolTest, AsyncBarrier) {
   CHECK_UNSUPPORTED();
@@ -169,15 +185,9 @@ TEST_F(ThreadPoolTest, PoolDestruction) {
 
 #if LLVM_ENABLE_THREADS == 1
 
-void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
-  // FIXME: Skip these tests on non-Windows because multi-socket system were not
-  // tested on Unix yet, and llvm::get_thread_affinity_mask() isn't implemented
-  // for Unix.
-  Triple Host(Triple::normalize(sys::getProcessTriple()));
-  if (!Host.isOSWindows())
-    return;
-
-  llvm::DenseSet<llvm::BitVector> ThreadsUsed;
+std::vector<llvm::BitVector>
+ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
+  llvm::SetVector<llvm::BitVector> ThreadsUsed;
   std::mutex Lock;
   {
     std::condition_variable AllThreads;
@@ -198,7 +208,7 @@ void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
         ThreadsUsed.insert(Mask);
       });
     }
-    ASSERT_EQ(true, ThreadsUsed.empty());
+    EXPECT_EQ(true, ThreadsUsed.empty());
     {
       std::unique_lock<std::mutex> Guard(AllThreadsLock);
       AllThreads.wait(Guard,
@@ -206,17 +216,68 @@ void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
     }
     setMainThreadReady();
   }
-  ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
+  return ThreadsUsed.takeVector();
 }
 
 TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
   CHECK_UNSUPPORTED();
-  RunOnAllSockets({});
+  SKIP_NON_WINDOWS();
+  std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
+  ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
 }
 
 TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
   CHECK_UNSUPPORTED();
-  RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
+  SKIP_NON_WINDOWS();
+  std::vector<llvm::BitVector> ThreadsUsed =
+      RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
+  ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
 }
 
+// From TestMain.cpp.
+extern const char *TestMainArgv0;
+
+// Just a reachable symbol to ease resolving of the executable's path.
+static cl::opt<std::string> ThreadPoolTestStringArg1("thread-pool-string-arg1");
+
+#ifdef _MSC_VER
+#define setenv(name, var, ignore) _putenv_s(name, var)
 #endif
+
+TEST_F(ThreadPoolTest, AffinityMask) {
+  CHECK_UNSUPPORTED();
+
+  // FIXME: implement AffinityMask in Support/Unix/Program.inc
+  SKIP_NON_WINDOWS();
+
+  // Skip this test if less than 4 threads are available.
+  if (llvm::hardware_concurrency().compute_thread_count() < 4)
+    return;
+
+  using namespace llvm::sys;
+  if (getenv("LLVM_THREADPOOL_AFFINITYMASK")) {
+    std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
+    // Ensure the threads only ran on CPUs 0-3.
+    for (auto &It : ThreadsUsed)
+      ASSERT_LT(It.getData().front(), 16UL);
+    return;
+  }
+  std::string Executable =
+      sys::fs::getMainExecutable(TestMainArgv0, &ThreadPoolTestStringArg1);
+  StringRef argv[] = {Executable, "--gtest_filter=ThreadPoolTest.AffinityMask"};
+
+  // Add environment variable to the environment of the child process.
+  int Res = setenv("LLVM_THREADPOOL_AFFINITYMASK", "1", false);
+  ASSERT_EQ(Res, 0);
+
+  std::string Error;
+  bool ExecutionFailed;
+  BitVector Affinity;
+  Affinity.resize(4);
+  Affinity.set(0, 4); // Use CPUs 0,1,2,3.
+  int Ret = sys::ExecuteAndWait(Executable, argv, {}, {}, 0, 0, &Error,
+                                &ExecutionFailed, nullptr, &Affinity);
+  ASSERT_EQ(0, Ret);
+}
+
+#endif // #if LLVM_ENABLE_THREADS == 1


        


More information about the llvm-branch-commits mailing list