[Openmp-commits] [openmp] [OpenMP] Directly use user's grid and block size in kernel language mode (PR #70612)

Shilei Tian via Openmp-commits openmp-commits at lists.llvm.org
Mon Dec 18 09:20:58 PST 2023


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/70612

>From f89b7e588175ca2c1447e5a21dc016dd390570d5 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Mon, 18 Dec 2023 12:20:40 -0500
Subject: [PATCH] [OpenMP] Directly use user's grid and block size in kernel
 language mode

In kernel language mode, use user's grid and blocks size directly. No validity
check, which means if user's values are too large, the launch will fail, similar
to what CUDA and HIP are doing right now.
---
 .../common/include/PluginInterface.h          |  3 ++
 .../common/src/PluginInterface.cpp            |  8 ++++
 .../libomptarget/test/offloading/ompx_bare.c  | 38 +++++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 openmp/libomptarget/test/offloading/ompx_bare.c

diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 716b0ad7843310..28484ae4d5f5ea 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -397,6 +397,9 @@ struct GenericKernelTy {
 
   /// The prototype kernel launch environment.
   KernelLaunchEnvironmentTy KernelLaunchEnvironment;
+
+  /// If the kernel is a bare kernel.
+  bool IsBareKernel = false;
 };
 
 /// Class representing a map of host pinned allocations. We track these pinned
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 1d96468340a083..1c9777dba7a9aa 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -436,6 +436,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
        Name, ErrStr.data());
     assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
            "Default initialization failed.");
+    IsBareKernel = true;
   }
 
   // Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
@@ -594,6 +595,10 @@ uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
                                         uint32_t ThreadLimitClause[3]) const {
   assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
+
+  if (IsBareKernel && ThreadLimitClause[0] > 0)
+    return ThreadLimitClause[0];
+
   if (ThreadLimitClause[0] > 0 && isGenericMode())
     ThreadLimitClause[0] += GenericDevice.getWarpSize();
 
@@ -610,6 +615,9 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
   assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
 
+  if (IsBareKernel && NumTeamsClause[0] > 0)
+    return NumTeamsClause[0];
+
   if (NumTeamsClause[0] > 0) {
     // TODO: We need to honor any value and consequently allow more than the
     // block limit. For this we might need to start multiple kernels or let the
diff --git a/openmp/libomptarget/test/offloading/ompx_bare.c b/openmp/libomptarget/test/offloading/ompx_bare.c
new file mode 100644
index 00000000000000..fb3810bd1df126
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/ompx_bare.c
@@ -0,0 +1,38 @@
+// RUN: %libomptarget-compile-generic
+// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | %fcheck-generic
+//
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+
+#include <assert.h>
+#include <ompx.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+  const int num_blocks = 64;
+  const int block_size = 64;
+  const int N = num_blocks * block_size;
+  int *data = (int *)malloc(N * sizeof(int));
+
+  // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with 64 blocks and 64 threads in SPMD mode
+
+#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
+  {
+    int bid = ompx_block_id_x();
+    int bdim = ompx_block_dim_x();
+    int tid = ompx_thread_id_x();
+    int idx = bid * bdim + tid;
+    data[idx] = idx;
+  }
+
+  for (int i = 0; i < N; ++i)
+    assert(data[i] == i);
+
+  // CHECK: PASS
+  printf("PASS\n");
+
+  return 0;
+}



More information about the Openmp-commits mailing list