[clang] fae233c - [OpenMP] Avoid initializing the KernelLaunchEnvironment if possible (#73864)

via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 29 14:49:20 PST 2023


Author: Johannes Doerfert
Date: 2023-11-29T14:49:13-08:00
New Revision: fae233c63f93b4b6f9693685abe6c7d24393682f

URL: https://github.com/llvm/llvm-project/commit/fae233c63f93b4b6f9693685abe6c7d24393682f
DIFF: https://github.com/llvm/llvm-project/commit/fae233c63f93b4b6f9693685abe6c7d24393682f.diff

LOG: [OpenMP] Avoid initializing the KernelLaunchEnvironment if possible (#73864)

If we don't have a team reduction we don't need a kernel launch
environment (for now). In that case we can avoid the cost.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
    openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 7ddc67e8a04ab64..5b9dbbf7e83a968 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -804,7 +804,9 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
       CGM.getTypes().ConvertTypeForMem(StaticTy);
   const auto &DL = CGM.getModule().getDataLayout();
   uint64_t ReductionDataSize =
-      DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
+      TeamsReductions.empty()
+          ? 0
+          : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
   CGBuilderTy &Bld = CGF.Builder;
   OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,
                                 C.getLangOpts().OpenMPCUDAReductionBufNum);

diff  --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 477e0cad06fd50a..2ba9aca9e141a4d 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -464,6 +464,10 @@ GenericKernelTy::getKernelLaunchEnvironment(
   if (isCtorOrDtor() || RecordReplay.isReplaying())
     return nullptr;
 
+  if (!KernelEnvironment.Configuration.ReductionDataSize ||
+      !KernelEnvironment.Configuration.ReductionBufferLength)
+    return reinterpret_cast<KernelLaunchEnvironmentTy *>(~0);
+
   // TODO: Check if the kernel needs a launch environment.
   auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy),
                                             /*HostPtr=*/nullptr,
@@ -478,8 +482,7 @@ GenericKernelTy::getKernelLaunchEnvironment(
   /// async data transfer.
   auto &LocalKLE = (*AsyncInfoWrapper).KernelLaunchEnvironment;
   LocalKLE = KernelLaunchEnvironment;
-  if (KernelEnvironment.Configuration.ReductionDataSize &&
-      KernelEnvironment.Configuration.ReductionBufferLength) {
+  {
     auto AllocOrErr = GenericDevice.dataAlloc(
         KernelEnvironment.Configuration.ReductionDataSize *
             KernelEnvironment.Configuration.ReductionBufferLength,


        


More information about the cfe-commits mailing list