[llvm-branch-commits] [llvm] 4fcb255 - Re-land [Support] On Windows, take the affinity mask into account
Alexandre Ganea via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 14 14:08:04 PST 2021
Author: Alexandre Ganea
Date: 2021-01-14T17:03:22-05:00
New Revision: 4fcb25583c3ccbe10c4367d02086269e5fa0bb87
URL: https://github.com/llvm/llvm-project/commit/4fcb25583c3ccbe10c4367d02086269e5fa0bb87
DIFF: https://github.com/llvm/llvm-project/commit/4fcb25583c3ccbe10c4367d02086269e5fa0bb87.diff
LOG: Re-land [Support] On Windows, take the affinity mask into account
The number of hardware threads available to a ThreadPool can be limited if setting an affinity mask.
For example:
> start /B /AFFINITY 0xF lld-link.exe ...
Would let LLD only use 4 hyper-threads.
Previously, there was an outstanding issue on Windows Server 2019 on dual-CPU machines, which was preventing from using both CPU sockets. In normal conditions, when no affinity mask was set, ProcessorGroup::AllThreads was different from ProcessorGroup::UsableThreads. The previous code in llvm/lib/Support/Windows/Threading.inc L201 was improperly assuming those two values to be equal, and consequently was limiting the execution to only one CPU socket.
Differential Revision: https://reviews.llvm.org/D92419
Added:
Modified:
llvm/include/llvm/Support/Program.h
llvm/lib/Support/Program.cpp
llvm/lib/Support/Unix/Program.inc
llvm/lib/Support/Windows/Program.inc
llvm/lib/Support/Windows/Threading.inc
llvm/unittests/Support/ThreadPool.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h
index b32de47adb57..bfd271958788 100644
--- a/llvm/include/llvm/Support/Program.h
+++ b/llvm/include/llvm/Support/Program.h
@@ -14,6 +14,7 @@
#define LLVM_SUPPORT_PROGRAM_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
@@ -125,9 +126,11 @@ namespace sys {
///< string is non-empty upon return an error occurred while invoking the
///< program.
bool *ExecutionFailed = nullptr,
- Optional<ProcessStatistics> *ProcStat = nullptr ///< If non-zero, provides
- /// a pointer to a structure in which process execution statistics will be
- /// stored.
+ Optional<ProcessStatistics> *ProcStat = nullptr, ///< If non-zero,
+ /// provides a pointer to a structure in which process execution
+ /// statistics will be stored.
+ BitVector *AffinityMask = nullptr ///< CPUs or processors the new
+ /// program shall run on.
);
/// Similar to ExecuteAndWait, but returns immediately.
@@ -140,7 +143,8 @@ namespace sys {
ArrayRef<Optional<StringRef>> Redirects = {},
unsigned MemoryLimit = 0,
std::string *ErrMsg = nullptr,
- bool *ExecutionFailed = nullptr);
+ bool *ExecutionFailed = nullptr,
+ BitVector *AffinityMask = nullptr);
/// Return true if the given arguments fit within system-specific
/// argument length limits.
diff --git a/llvm/lib/Support/Program.cpp b/llvm/lib/Support/Program.cpp
index 5294f65bd5a5..c7a59642b27e 100644
--- a/llvm/lib/Support/Program.cpp
+++ b/llvm/lib/Support/Program.cpp
@@ -26,17 +26,20 @@ using namespace sys;
static bool Execute(ProcessInfo &PI, StringRef Program,
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
- unsigned MemoryLimit, std::string *ErrMsg);
+ unsigned MemoryLimit, std::string *ErrMsg,
+ BitVector *AffinityMask);
int sys::ExecuteAndWait(StringRef Program, ArrayRef<StringRef> Args,
Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
unsigned SecondsToWait, unsigned MemoryLimit,
std::string *ErrMsg, bool *ExecutionFailed,
- Optional<ProcessStatistics> *ProcStat) {
+ Optional<ProcessStatistics> *ProcStat,
+ BitVector *AffinityMask) {
assert(Redirects.empty() || Redirects.size() == 3);
ProcessInfo PI;
- if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg)) {
+ if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
+ AffinityMask)) {
if (ExecutionFailed)
*ExecutionFailed = false;
ProcessInfo Result =
@@ -55,12 +58,13 @@ ProcessInfo sys::ExecuteNoWait(StringRef Program, ArrayRef<StringRef> Args,
Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
unsigned MemoryLimit, std::string *ErrMsg,
- bool *ExecutionFailed) {
+ bool *ExecutionFailed, BitVector *AffinityMask) {
assert(Redirects.empty() || Redirects.size() == 3);
ProcessInfo PI;
if (ExecutionFailed)
*ExecutionFailed = false;
- if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg))
+ if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
+ AffinityMask))
if (ExecutionFailed)
*ExecutionFailed = true;
diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc
index 8f41fc015163..fb56fa4b0d1d 100644
--- a/llvm/lib/Support/Unix/Program.inc
+++ b/llvm/lib/Support/Unix/Program.inc
@@ -174,7 +174,8 @@ toNullTerminatedCStringArray(ArrayRef<StringRef> Strings, StringSaver &Saver) {
static bool Execute(ProcessInfo &PI, StringRef Program,
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
- unsigned MemoryLimit, std::string *ErrMsg) {
+ unsigned MemoryLimit, std::string *ErrMsg,
+ BitVector *AffinityMask) {
if (!llvm::sys::fs::exists(Program)) {
if (ErrMsg)
*ErrMsg = std::string("Executable \"") + Program.str() +
@@ -182,6 +183,9 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
return false;
}
+ assert(!AffinityMask && "Starting a process with an affinity mask is "
+ "currently not supported on Unix!");
+
BumpPtrAllocator Allocator;
StringSaver Saver(Allocator);
std::vector<const char *> ArgVector, EnvVector;
diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index 91df4488850e..f1d612cf3c98 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -171,7 +171,8 @@ static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
static bool Execute(ProcessInfo &PI, StringRef Program,
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
ArrayRef<Optional<StringRef>> Redirects,
- unsigned MemoryLimit, std::string *ErrMsg) {
+ unsigned MemoryLimit, std::string *ErrMsg,
+ BitVector *AffinityMask) {
if (!sys::fs::can_execute(Program)) {
if (ErrMsg)
*ErrMsg = "program not executable";
@@ -277,11 +278,15 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
return false;
}
+ unsigned CreateFlags = CREATE_UNICODE_ENVIRONMENT;
+ if (AffinityMask)
+ CreateFlags |= CREATE_SUSPENDED;
+
std::vector<wchar_t> CommandUtf16(Command.size() + 1, 0);
std::copy(Command.begin(), Command.end(), CommandUtf16.begin());
BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, TRUE,
- CREATE_UNICODE_ENVIRONMENT,
- EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, &pi);
+ CreateFlags, EnvBlock.empty() ? 0 : EnvBlock.data(),
+ 0, &si, &pi);
DWORD err = GetLastError();
// Regardless of whether the process got created or not, we are done with
@@ -329,6 +334,13 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
}
}
+ // Set the affinity mask
+ if (AffinityMask) {
+ ::SetProcessAffinityMask(pi.hProcess,
+ (DWORD_PTR)AffinityMask->getData().front());
+ ::ResumeThread(pi.hThread);
+ }
+
return true;
}
diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc
index 296e87b77695..6448bb478d0c 100644
--- a/llvm/lib/Support/Windows/Threading.inc
+++ b/llvm/lib/Support/Windows/Threading.inc
@@ -195,14 +195,27 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
if (!IterateProcInfo(RelationProcessorCore, HandleProc))
return std::vector<ProcessorGroup>();
- // If there's an affinity mask set on one of the CPUs, then assume the user
- // wants to constrain the current process to only a single CPU.
- for (auto &G : Groups) {
- if (G.UsableThreads != G.AllThreads) {
- ProcessorGroup NewG{G};
+ // If there's an affinity mask set, assume the user wants to constrain the
+ // current process to only a single CPU group. On Windows, it is not
+ // possible for affinity masks to cross CPU group boundaries.
+ DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
+ if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
+ &SystemAffinityMask) &&
+ ProcessAffinityMask != SystemAffinityMask) {
+ // We don't expect more that 4 CPU groups on Windows (256 processors).
+ USHORT GroupCount = 4;
+ USHORT GroupArray[4]{};
+ if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
+ GroupArray)) {
+ assert(GroupCount == 1 &&
+ "On startup, a program is expected to be assigned only to "
+ "one processor group!");
+ unsigned CurrentGroupID = GroupArray[0];
+ ProcessorGroup NewG{Groups[CurrentGroupID]};
+ NewG.Affinity = ProcessAffinityMask;
+ NewG.UsableThreads = countPopulation(ProcessAffinityMask);
Groups.clear();
Groups.push_back(NewG);
- break;
}
}
diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp
index 43882d0f3cee..12a2b76036c0 100644
--- a/llvm/unittests/Support/ThreadPool.cpp
+++ b/llvm/unittests/Support/ThreadPool.cpp
@@ -8,11 +8,13 @@
#include "llvm/Support/ThreadPool.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/Program.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Threading.h"
@@ -45,6 +47,14 @@ class ThreadPoolTest : public testing::Test {
return false;
}
+ bool isWindows() {
+ // FIXME: Skip some tests below on non-Windows because multi-socket systems
+ // were not fully tested on Unix yet, and llvm::get_thread_affinity_mask()
+ // isn't implemented for Unix.
+ Triple Host(Triple::normalize(sys::getProcessTriple()));
+ return Host.isOSWindows();
+ }
+
ThreadPoolTest() {
// Add unsupported configuration here, example:
// UnsupportedArchs.push_back(Triple::x86_64);
@@ -71,18 +81,24 @@ class ThreadPoolTest : public testing::Test {
void SetUp() override { MainThreadReady = false; }
- void RunOnAllSockets(ThreadPoolStrategy S);
+ std::vector<llvm::BitVector> RunOnAllSockets(ThreadPoolStrategy S);
std::condition_variable WaitMainThread;
std::mutex WaitMainThreadMutex;
bool MainThreadReady = false;
};
-#define CHECK_UNSUPPORTED() \
- do { \
- if (isUnsupportedOSOrEnvironment()) \
- return; \
- } while (0); \
+#define CHECK_UNSUPPORTED() \
+ do { \
+ if (isUnsupportedOSOrEnvironment()) \
+ return; \
+ } while (0);
+
+#define SKIP_NON_WINDOWS() \
+ do { \
+ if (!isWindows()) \
+ return; \
+ } while (0);
TEST_F(ThreadPoolTest, AsyncBarrier) {
CHECK_UNSUPPORTED();
@@ -169,15 +185,9 @@ TEST_F(ThreadPoolTest, PoolDestruction) {
#if LLVM_ENABLE_THREADS == 1
-void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
- // FIXME: Skip these tests on non-Windows because multi-socket system were not
- // tested on Unix yet, and llvm::get_thread_affinity_mask() isn't implemented
- // for Unix.
- Triple Host(Triple::normalize(sys::getProcessTriple()));
- if (!Host.isOSWindows())
- return;
-
- llvm::DenseSet<llvm::BitVector> ThreadsUsed;
+std::vector<llvm::BitVector>
+ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
+ llvm::SetVector<llvm::BitVector> ThreadsUsed;
std::mutex Lock;
{
std::condition_variable AllThreads;
@@ -198,7 +208,7 @@ void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
ThreadsUsed.insert(Mask);
});
}
- ASSERT_EQ(true, ThreadsUsed.empty());
+ EXPECT_EQ(true, ThreadsUsed.empty());
{
std::unique_lock<std::mutex> Guard(AllThreadsLock);
AllThreads.wait(Guard,
@@ -206,17 +216,68 @@ void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
}
setMainThreadReady();
}
- ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
+ return ThreadsUsed.takeVector();
}
TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
CHECK_UNSUPPORTED();
- RunOnAllSockets({});
+ SKIP_NON_WINDOWS();
+ std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
+ ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
}
TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
CHECK_UNSUPPORTED();
- RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
+ SKIP_NON_WINDOWS();
+ std::vector<llvm::BitVector> ThreadsUsed =
+ RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
+ ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
}
+// From TestMain.cpp.
+extern const char *TestMainArgv0;
+
+// Just a reachable symbol to ease resolving of the executable's path.
+static cl::opt<std::string> ThreadPoolTestStringArg1("thread-pool-string-arg1");
+
+#ifdef _MSC_VER
+#define setenv(name, var, ignore) _putenv_s(name, var)
#endif
+
+TEST_F(ThreadPoolTest, AffinityMask) {
+ CHECK_UNSUPPORTED();
+
+ // FIXME: implement AffinityMask in Support/Unix/Program.inc
+ SKIP_NON_WINDOWS();
+
+ // Skip this test if less than 4 threads are available.
+ if (llvm::hardware_concurrency().compute_thread_count() < 4)
+ return;
+
+ using namespace llvm::sys;
+ if (getenv("LLVM_THREADPOOL_AFFINITYMASK")) {
+ std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
+ // Ensure the threads only ran on CPUs 0-3.
+ for (auto &It : ThreadsUsed)
+ ASSERT_LT(It.getData().front(), 16UL);
+ return;
+ }
+ std::string Executable =
+ sys::fs::getMainExecutable(TestMainArgv0, &ThreadPoolTestStringArg1);
+ StringRef argv[] = {Executable, "--gtest_filter=ThreadPoolTest.AffinityMask"};
+
+ // Add environment variable to the environment of the child process.
+ int Res = setenv("LLVM_THREADPOOL_AFFINITYMASK", "1", false);
+ ASSERT_EQ(Res, 0);
+
+ std::string Error;
+ bool ExecutionFailed;
+ BitVector Affinity;
+ Affinity.resize(4);
+ Affinity.set(0, 4); // Use CPUs 0,1,2,3.
+ int Ret = sys::ExecuteAndWait(Executable, argv, {}, {}, 0, 0, &Error,
+ &ExecutionFailed, nullptr, &Affinity);
+ ASSERT_EQ(0, Ret);
+}
+
+#endif // #if LLVM_ENABLE_THREADS == 1
More information about the llvm-branch-commits
mailing list