[Openmp-commits] [openmp] 428bc51 - [OpenMP] Unify "exec_mode" query code and default to SPMD
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jan 3 17:10:22 PST 2023
Author: Johannes Doerfert
Date: 2023-01-03T16:58:13-08:00
New Revision: 428bc510bf50f1517b4216c1499885d82c8bce9a
URL: https://github.com/llvm/llvm-project/commit/428bc510bf50f1517b4216c1499885d82c8bce9a
DIFF: https://github.com/llvm/llvm-project/commit/428bc510bf50f1517b4216c1499885d82c8bce9a.diff
LOG: [OpenMP] Unify "exec_mode" query code and default to SPMD
Defaulting to Generic mode doesn't make much sense as the kernel needs
to be prepared for it. SPMD mode is the "native" execution, e.g., for
"bare" kernels. It also is the execution method for constructors and
destructors (as we might otherwise throw an extra warp onto them).
Differential Revision: https://reviews.llvm.org/D140718
Added:
Modified:
openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 2d1ca52aad227..53c5a20c3680e 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1630,31 +1630,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Expected<GenericKernelTy *>
constructKernelEntry(const __tgt_offload_entry &KernelEntry,
DeviceImageTy &Image) override {
- // Create a metadata object for the exec mode global (auto-generated).
- StaticGlobalTy<llvm::omp::OMPTgtExecModeFlags> ExecModeGlobal(
- KernelEntry.name, "_exec_mode");
-
- // Retrieve execution mode for the kernel. This may fail since some kernels
- // may not have a execution mode.
- GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
- if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) {
- DP("Failed to read execution mode for '%s': %s\n"
- "Using default GENERIC (1) execution mode\n",
- KernelEntry.name, toString(std::move(Err)).data());
- // Consume the error since it is acceptable to fail.
- consumeError(std::move(Err));
- // In some cases the execution mode is not included, so use the default.
- ExecModeGlobal.setValue(llvm::omp::OMP_TGT_EXEC_MODE_GENERIC);
- }
- // Check that the retrieved execution mode is valid.
- if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue()))
- return Plugin::error("Invalid execution mode %d for '%s'",
- ExecModeGlobal.getValue(), KernelEntry.name);
+ Expected<OMPTgtExecModeFlags> ExecModeOrErr =
+ getExecutionModeForKernel(KernelEntry.name, Image);
+ if (!ExecModeOrErr)
+ return ExecModeOrErr.takeError();
// Allocate and initialize the AMDGPU kernel.
AMDGPUKernelTy *AMDKernel = Plugin::get().allocate<AMDGPUKernelTy>();
- new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecModeGlobal.getValue());
+ new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecModeOrErr.get());
return AMDKernel;
}
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 4d73ab69af5f0..8f71a383ca8df 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -16,6 +16,9 @@
#include "omptarget.h"
#include "omptargetplugin.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Support/Error.h"
+
#include <cstdint>
#include <limits>
@@ -357,6 +360,34 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
return Plugin::success();
}
+Expected<OMPTgtExecModeFlags>
+GenericDeviceTy::getExecutionModeForKernel(StringRef Name,
+ DeviceImageTy &Image) {
+ // Create a metadata object for the exec mode global (auto-generated).
+ StaticGlobalTy<llvm::omp::OMPTgtExecModeFlags> ExecModeGlobal(Name.data(),
+ "_exec_mode");
+
+ // Retrieve execution mode for the kernel. This may fail since some kernels
+ // may not have an execution mode.
+ GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
+ if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) {
+ // Consume the error since it is acceptable to fail.
+ [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
+ DP("Failed to read execution mode for '%s': %s\n"
+ "Using default SPMD (2) execution mode\n",
+ Name.data(), ErrStr.data());
+
+ return OMP_TGT_EXEC_MODE_SPMD;
+ }
+
+ // Check that the retrieved execution mode is valid.
+ if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue()))
+ return Plugin::error("Invalid execution mode %d for '%s'",
+ ExecModeGlobal.getValue(), Name.data());
+
+ return ExecModeGlobal.getValue();
+}
+
Error GenericDeviceTy::registerHostPinnedMemoryBuffer(const void *Buffer,
size_t Size) {
std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index 836eb81e6eae2..774a03474dd02 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -472,6 +472,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
return ((const char *)It->first + It->second > (const char *)Buffer);
}
+ /// Return the execution mode used for kernel \p Name.
+ Expected<OMPTgtExecModeFlags> getExecutionModeForKernel(StringRef Name,
+ DeviceImageTy &Image);
+
/// Environment variables defined by the LLVM OpenMP implementation
/// regarding the initial number of streams and events.
UInt32Envar OMPX_InitialNumStreams;
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index 5b9fc7786c5b9..cb5f004d81046 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -24,6 +24,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
+#include "llvm/Support/Error.h"
namespace llvm {
namespace omp {
@@ -348,33 +349,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
DP("Entry point " DPxMOD " maps to %s (" DPxMOD ")\n", DPxPTR(&KernelEntry),
KernelEntry.name, DPxPTR(Func));
- // Create a metadata object for the exec mode global (auto-generated).
- StaticGlobalTy<llvm::omp::OMPTgtExecModeFlags> ExecModeGlobal(
- KernelEntry.name, "_exec_mode");
-
- // Retrieve execution mode for the kernel. This may fail since some kernels
- // may not have a execution mode.
- GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
- if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) {
- // In some cases the execution mode is not included, so use the default.
- ExecModeGlobal.setValue(llvm::omp::OMP_TGT_EXEC_MODE_GENERIC);
- // Consume the error since it is acceptable to fail.
- [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
-
- DP("Failed to read execution mode for '%s': %s\n"
- "Using default GENERIC (1) execution mode\n",
- KernelEntry.name, ErrStr.data());
- }
-
- // Check that the retrieved execution mode is valid.
- if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue()))
- return Plugin::error("Invalid execution mode %d for '%s'",
- ExecModeGlobal.getValue(), KernelEntry.name);
+ Expected<OMPTgtExecModeFlags> ExecModeOrErr =
+ getExecutionModeForKernel(KernelEntry.name, Image);
+ if (!ExecModeOrErr)
+ return ExecModeOrErr.takeError();
// Allocate and initialize the CUDA kernel.
CUDAKernelTy *CUDAKernel = Plugin::get().allocate<CUDAKernelTy>();
- new (CUDAKernel)
- CUDAKernelTy(KernelEntry.name, ExecModeGlobal.getValue(), Func);
+ new (CUDAKernel) CUDAKernelTy(KernelEntry.name, ExecModeOrErr.get(), Func);
return CUDAKernel;
}
More information about the Openmp-commits
mailing list