[clang] fae233c - [OpenMP] Avoid initializing the KernelLaunchEnvironment if possible (#73864)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 29 14:49:20 PST 2023
Author: Johannes Doerfert
Date: 2023-11-29T14:49:13-08:00
New Revision: fae233c63f93b4b6f9693685abe6c7d24393682f
URL: https://github.com/llvm/llvm-project/commit/fae233c63f93b4b6f9693685abe6c7d24393682f
DIFF: https://github.com/llvm/llvm-project/commit/fae233c63f93b4b6f9693685abe6c7d24393682f.diff
LOG: [OpenMP] Avoid initializing the KernelLaunchEnvironment if possible (#73864)
If we don't have a team reduction we don't need a kernel launch
environment (for now). In that case we can avoid the cost.
Added:
Modified:
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 7ddc67e8a04ab64..5b9dbbf7e83a968 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -804,7 +804,9 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
CGM.getTypes().ConvertTypeForMem(StaticTy);
const auto &DL = CGM.getModule().getDataLayout();
uint64_t ReductionDataSize =
- DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
+ TeamsReductions.empty()
+ ? 0
+ : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
CGBuilderTy &Bld = CGF.Builder;
OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,
C.getLangOpts().OpenMPCUDAReductionBufNum);
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 477e0cad06fd50a..2ba9aca9e141a4d 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -464,6 +464,10 @@ GenericKernelTy::getKernelLaunchEnvironment(
if (isCtorOrDtor() || RecordReplay.isReplaying())
return nullptr;
+ if (!KernelEnvironment.Configuration.ReductionDataSize ||
+ !KernelEnvironment.Configuration.ReductionBufferLength)
+ return reinterpret_cast<KernelLaunchEnvironmentTy *>(~0);
+
// TODO: Check if the kernel needs a launch environment.
auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy),
/*HostPtr=*/nullptr,
@@ -478,8 +482,7 @@ GenericKernelTy::getKernelLaunchEnvironment(
/// async data transfer.
auto &LocalKLE = (*AsyncInfoWrapper).KernelLaunchEnvironment;
LocalKLE = KernelLaunchEnvironment;
- if (KernelEnvironment.Configuration.ReductionDataSize &&
- KernelEnvironment.Configuration.ReductionBufferLength) {
+ {
auto AllocOrErr = GenericDevice.dataAlloc(
KernelEnvironment.Configuration.ReductionDataSize *
KernelEnvironment.Configuration.ReductionBufferLength,
More information about the cfe-commits
mailing list