[Openmp-commits] [openmp] 3768039 - [OpenMP] Directly use user's grid and block size in kernel language mode (#70612)

via Openmp-commits openmp-commits at lists.llvm.org
Mon Dec 18 09:26:22 PST 2023


Author: Shilei Tian
Date: 2023-12-18T12:26:18-05:00
New Revision: 3768039913be32666a316a2b5c12739c423dbc61

URL: https://github.com/llvm/llvm-project/commit/3768039913be32666a316a2b5c12739c423dbc61
DIFF: https://github.com/llvm/llvm-project/commit/3768039913be32666a316a2b5c12739c423dbc61.diff

LOG: [OpenMP] Directly use user's grid and block size in kernel language mode (#70612)

In kernel language mode, use user's grid and blocks size directly. No
validity
check, which means if user's values are too large, the launch will fail,
similar
to what CUDA and HIP are doing right now.

Added: 
    openmp/libomptarget/test/offloading/ompx_bare.c

Modified: 
    openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
    openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 716b0ad7843310..28484ae4d5f5ea 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -397,6 +397,9 @@ struct GenericKernelTy {
 
   /// The prototype kernel launch environment.
   KernelLaunchEnvironmentTy KernelLaunchEnvironment;
+
+  /// If the kernel is a bare kernel.
+  bool IsBareKernel = false;
 };
 
 /// Class representing a map of host pinned allocations. We track these pinned

diff  --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 1d96468340a083..1c9777dba7a9aa 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -436,6 +436,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
        Name, ErrStr.data());
     assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
            "Default initialization failed.");
+    IsBareKernel = true;
   }
 
   // Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
@@ -594,6 +595,10 @@ uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
                                         uint32_t ThreadLimitClause[3]) const {
   assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
+
+  if (IsBareKernel && ThreadLimitClause[0] > 0)
+    return ThreadLimitClause[0];
+
   if (ThreadLimitClause[0] > 0 && isGenericMode())
     ThreadLimitClause[0] += GenericDevice.getWarpSize();
 
@@ -610,6 +615,9 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
   assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
 
+  if (IsBareKernel && NumTeamsClause[0] > 0)
+    return NumTeamsClause[0];
+
   if (NumTeamsClause[0] > 0) {
     // TODO: We need to honor any value and consequently allow more than the
     // block limit. For this we might need to start multiple kernels or let the

diff  --git a/openmp/libomptarget/test/offloading/ompx_bare.c b/openmp/libomptarget/test/offloading/ompx_bare.c
new file mode 100644
index 00000000000000..fb3810bd1df126
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/ompx_bare.c
@@ -0,0 +1,38 @@
+// RUN: %libomptarget-compile-generic
+// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | %fcheck-generic
+//
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+
+#include <assert.h>
+#include <ompx.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+  const int num_blocks = 64;
+  const int block_size = 64;
+  const int N = num_blocks * block_size;
+  int *data = (int *)malloc(N * sizeof(int));
+
+  // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with 64 blocks and 64 threads in SPMD mode
+
+#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
+  {
+    int bid = ompx_block_id_x();
+    int bdim = ompx_block_dim_x();
+    int tid = ompx_thread_id_x();
+    int idx = bid * bdim + tid;
+    data[idx] = idx;
+  }
+
+  for (int i = 0; i < N; ++i)
+    assert(data[i] == i);
+
+  // CHECK: PASS
+  printf("PASS\n");
+
+  return 0;
+}


        


More information about the Openmp-commits mailing list