[llvm] 87db8e9 - [OpenMP][Offload] Add SPMD-No-Loop mode to OpenMP offload runtime (#154105)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 00:19:18 PDT 2025


Author: Dominik Adamski
Date: 2025-08-28T09:19:14+02:00
New Revision: 87db8e9130e49f6fd3b35ef1e22fd71bf55ef027

URL: https://github.com/llvm/llvm-project/commit/87db8e9130e49f6fd3b35ef1e22fd71bf55ef027
DIFF: https://github.com/llvm/llvm-project/commit/87db8e9130e49f6fd3b35ef1e22fd71bf55ef027.diff

LOG: [OpenMP][Offload] Add SPMD-No-Loop mode to OpenMP offload runtime (#154105)

Kernels which are marked as SPMD-No-Loop should be launched with
sufficient number of teams and threads to cover loop iteration space.

No-Loop mode is described in RFC:

https://discourse.llvm.org/t/rfc-no-loop-mode-for-openmp-gpu-kernels/87517/

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
    offload/DeviceRTL/src/Kernel.cpp
    offload/plugins-nextgen/common/include/PluginInterface.h
    offload/plugins-nextgen/common/src/PluginInterface.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
index 3ae447b14f320..c41b4d1e9844c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h
@@ -23,7 +23,8 @@ enum OMPTgtExecModeFlags : unsigned char {
   OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
   OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
   OMP_TGT_EXEC_MODE_GENERIC_SPMD =
-      OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD
+      OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
+  OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2 | OMP_TGT_EXEC_MODE_SPMD
 };
 
 } // end namespace omp

diff  --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index 467e44a65276c..8c2828b270419 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -30,7 +30,8 @@ enum OMPTgtExecModeFlags : unsigned char {
   OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
   OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
   OMP_TGT_EXEC_MODE_GENERIC_SPMD =
-      OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD
+      OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
+  OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2 | OMP_TGT_EXEC_MODE_SPMD
 };
 
 static void

diff  --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 75f87cab6049d..2c01ed219b29c 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -434,6 +434,8 @@ struct GenericKernelTy {
       return "Generic";
     case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
       return "Generic-SPMD";
+    case OMP_TGT_EXEC_MODE_SPMD_NO_LOOP:
+      return "SPMD-No-Loop";
     }
     llvm_unreachable("Unknown execution mode!");
   }
@@ -471,7 +473,8 @@ struct GenericKernelTy {
                         uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
                         uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
 
-  /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
+  /// Indicate if the kernel works in Generic SPMD, Generic, No-Loop
+  /// or SPMD mode.
   bool isGenericSPMDMode() const {
     return KernelEnvironment.Configuration.ExecMode ==
            OMP_TGT_EXEC_MODE_GENERIC_SPMD;
@@ -486,6 +489,10 @@ struct GenericKernelTy {
   bool isBareMode() const {
     return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_BARE;
   }
+  bool isNoLoopMode() const {
+    return KernelEnvironment.Configuration.ExecMode ==
+           OMP_TGT_EXEC_MODE_SPMD_NO_LOOP;
+  }
 
   /// The kernel name.
   std::string Name;

diff  --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index d4b5f914c6672..4363a4f7c06ca 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -662,6 +662,10 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
     return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit());
   }
 
+  // Return the number of teams required to cover the loop iterations.
+  if (isNoLoopMode())
+    return LoopTripCount > 0 ? (((LoopTripCount - 1) / NumThreads) + 1) : 1;
+
   uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks();
   uint64_t TripCountNumBlocks = std::numeric_limits<uint64_t>::max();
   if (LoopTripCount > 0) {


        


More information about the llvm-commits mailing list