[Openmp-commits] [openmp] daef6d3 - [OpenMP] Introduce ompx.h and 3D wrappers (threadId, threadDim, ...)

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Mon Jul 31 13:45:32 PDT 2023


Author: Johannes Doerfert
Date: 2023-07-31T13:44:51-07:00
New Revision: daef6d327aa3c952c91e54a22de4be1db191f911

URL: https://github.com/llvm/llvm-project/commit/daef6d327aa3c952c91e54a22de4be1db191f911
DIFF: https://github.com/llvm/llvm-project/commit/daef6d327aa3c952c91e54a22de4be1db191f911.diff

LOG: [OpenMP] Introduce ompx.h and 3D wrappers (threadId, threadDim, ...)

The new ompx.h header will give us a place to put extensions. The first
are 3D getters for the common cuda values:
  `{threadId,threadDim,blockId,blockDim}.{x,y,z}`

Differential Revision: https://reviews.llvm.org/D156501

Added: 
    openmp/libomptarget/test/api/ompx_3d.c
    openmp/libomptarget/test/api/ompx_3d.cpp
    openmp/runtime/src/include/ompx.h.var

Modified: 
    openmp/libomptarget/DeviceRTL/src/Mapping.cpp
    openmp/libomptarget/DeviceRTL/src/exports
    openmp/runtime/cmake/LibompExports.cmake
    openmp/runtime/src/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index 8f26af086e714d..2f50530e79a1d0 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -372,4 +372,12 @@ __attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
 }
 }
 
+#define _TGT_KERNEL_LANGUAGE(NAME, MAPPER_NAME)                                \
+  extern "C" int ompx_##NAME(int Dim) { return mapping::MAPPER_NAME(Dim); }
+
+_TGT_KERNEL_LANGUAGE(thread_id, getThreadIdInBlock)
+_TGT_KERNEL_LANGUAGE(thread_dim, getNumberOfThreadsInBlock)
+_TGT_KERNEL_LANGUAGE(block_id, getBlockIdInKernel)
+_TGT_KERNEL_LANGUAGE(block_dim, getNumberOfBlocksInKernel)
+
 #pragma omp end declare target

diff  --git a/openmp/libomptarget/DeviceRTL/src/exports b/openmp/libomptarget/DeviceRTL/src/exports
index 85fd459fee1b1f..2d13195aa7dc87 100644
--- a/openmp/libomptarget/DeviceRTL/src/exports
+++ b/openmp/libomptarget/DeviceRTL/src/exports
@@ -1,4 +1,5 @@
 omp_*
+ompx_*
 *llvm_*
 __kmpc_*
 

diff  --git a/openmp/libomptarget/test/api/ompx_3d.c b/openmp/libomptarget/test/api/ompx_3d.c
new file mode 100644
index 00000000000000..a67ad018358094
--- /dev/null
+++ b/openmp/libomptarget/test/api/ompx_3d.c
@@ -0,0 +1,41 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+#include <omp.h>
+#include <ompx.h>
+#include <stdio.h>
+
+void foo(int device) {
+  int tid = 0, bid = 0, bdim = 0;
+#pragma omp target teams distribute parallel for map(from                      \
+                                                     : tid, bid, bdim)         \
+    device(device) thread_limit(2) num_teams(5)
+  for (int i = 0; i < 1000; ++i) {
+    if (i == 42) {
+      tid = ompx_thread_dim_x();
+      bid = ompx_block_id_x();
+      bdim = ompx_block_dim_x();
+    }
+  }
+  // CHECK: tid: 2, bid: 1, bdim: 5
+  // CHECK: tid: 2, bid: 0, bdim: 1
+  printf("tid: %i, bid: %i, bdim: %i\n", tid, bid, bdim);
+}
+
+int isGPU() { return 0; }
+#pragma omp declare variant(isGPU) match(device = {arch(gpu)})
+int isGPUvariant() { return 1; }
+
+int defaultIsGPU() {
+  int r = 0;
+#pragma omp target map(from : r)
+  r = isGPU();
+  return r;
+}
+
+int main() {
+  if (defaultIsGPU())
+    foo(omp_get_default_device());
+  else
+    printf("tid: 2, bid: 1, bdim: 5\n");
+  foo(omp_get_initial_device());
+}

diff  --git a/openmp/libomptarget/test/api/ompx_3d.cpp b/openmp/libomptarget/test/api/ompx_3d.cpp
new file mode 100644
index 00000000000000..8b2f62239e6a36
--- /dev/null
+++ b/openmp/libomptarget/test/api/ompx_3d.cpp
@@ -0,0 +1,41 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+#include <omp.h>
+#include <ompx.h>
+#include <stdio.h>
+
+void foo(int device) {
+  int tid = 0, bid = 0, bdim = 0;
+#pragma omp target teams distribute parallel for map(from                      \
+                                                     : tid, bid, bdim)         \
+    device(device) thread_limit(2) num_teams(5)
+  for (int i = 0; i < 1000; ++i) {
+    if (i == 42) {
+      tid = ompx::thread_dim_x();
+      bid = ompx::block_id_x();
+      bdim = ompx::block_dim_x();
+    }
+  }
+  // CHECK: tid: 2, bid: 1, bdim: 5
+  // CHECK: tid: 2, bid: 0, bdim: 1
+  printf("tid: %i, bid: %i, bdim: %i\n", tid, bid, bdim);
+}
+
+int isGPU() { return 0; }
+#pragma omp declare variant(isGPU) match(device = {arch(gpu)})
+int isGPUvariant() { return 1; }
+
+int defaultIsGPU() {
+  int r = 0;
+#pragma omp target map(from : r)
+  r = isGPU();
+  return r;
+}
+
+int main() {
+  if (defaultIsGPU())
+    foo(omp_get_default_device());
+  else
+    printf("tid: 2, bid: 1, bdim: 5\n");
+  foo(omp_get_initial_device());
+}

diff  --git a/openmp/runtime/cmake/LibompExports.cmake b/openmp/runtime/cmake/LibompExports.cmake
index 97ecc5d691ff53..dbeb18f358f1ac 100644
--- a/openmp/runtime/cmake/LibompExports.cmake
+++ b/openmp/runtime/cmake/LibompExports.cmake
@@ -50,6 +50,7 @@ set(LIBOMP_EXPORTS_LIB_DIR "${LIBOMP_EXPORTS_DIR}/${libomp_platform}${libomp_suf
 add_custom_command(TARGET omp POST_BUILD
   COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBOMP_EXPORTS_CMN_DIR}
   COMMAND ${CMAKE_COMMAND} -E copy omp.h ${LIBOMP_EXPORTS_CMN_DIR}
+  COMMAND ${CMAKE_COMMAND} -E copy ompx.h ${LIBOMP_EXPORTS_CMN_DIR}
 )
 if(${LIBOMP_OMPT_SUPPORT})
   add_custom_command(TARGET omp POST_BUILD

diff  --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index bb582226451478..8b2445ac58bf0c 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -12,6 +12,7 @@ include(ExtendPath)
 
 # Configure omp.h, kmp_config.h and omp-tools.h if necessary
 configure_file(${LIBOMP_INC_DIR}/omp.h.var omp.h @ONLY)
+configure_file(${LIBOMP_INC_DIR}/ompx.h.var ompx.h @ONLY)
 configure_file(kmp_config.h.cmake kmp_config.h @ONLY)
 if(${LIBOMP_OMPT_SUPPORT})
   configure_file(${LIBOMP_INC_DIR}/omp-tools.h.var omp-tools.h @ONLY)
@@ -393,6 +394,7 @@ endif()
 install(
   FILES
   ${CMAKE_CURRENT_BINARY_DIR}/omp.h
+  ${CMAKE_CURRENT_BINARY_DIR}/ompx.h
   DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH}
 )
 if(${LIBOMP_OMPT_SUPPORT})

diff  --git a/openmp/runtime/src/include/ompx.h.var b/openmp/runtime/src/include/ompx.h.var
new file mode 100644
index 00000000000000..ea17e6e77cfacc
--- /dev/null
+++ b/openmp/runtime/src/include/ompx.h.var
@@ -0,0 +1,110 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __OMPX_H
+#define __OMPX_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int omp_get_ancestor_thread_num(int);
+int omp_get_team_size(int);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// Target kernel language extensions
+///
+/// These extensions exist for the host to allow fallback implementations,
+/// however, they cannot be arbitrarily composed with OpenMP. If the rules of
+/// the kernel language are followed, the host fallbacks should behave as
+/// expected since the kernel is represented as 3 sequential outer loops, one
+/// for each grid dimension, and three (nested) parallel loops, one for each
+/// block dimension. This fallback is not supposed to be optimal and should be
+/// configurable by the user.
+///
+///{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+  ompx_dim_x = 0,
+  ompx_dim_y = 1,
+  ompx_dim_z = 2,
+};
+
+/// ompx_{thread,block}_{id,dim}
+///{
+#pragma omp begin declare variant match(device = {kind(cpu)})
+#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE)                     \
+  static inline int ompx_##NAME(int Dim) { return VALUE; }
+
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id,
+                                      omp_get_ancestor_thread_num(Dim + 1))
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_dim, omp_get_team_size(Dim + 1))
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0)
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, 1)
+#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
+///}
+
+#pragma omp end declare variant
+
+/// ompx_{thread,block}_{id,dim}_{x,y,z}
+///{
+#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME)                                 \
+  int ompx_##NAME(int Dim);                                                    \
+  static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); }      \
+  static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); }      \
+  static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); }
+
+_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id)
+_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_dim)
+_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id)
+_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim)
+#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
+///}
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+
+namespace ompx {
+
+enum {
+  dim_x = ompx_dim_x,
+  dim_y = ompx_dim_y,
+  dim_z = ompx_dim_z,
+};
+
+/// ompx::{thread,block}_{id,dim}_{,x,y,z}
+///{
+#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME)                          \
+  static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); }        \
+  static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); }           \
+  static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); }           \
+  static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); }
+
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id)
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_dim)
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id)
+_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
+#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
+///}
+
+} // namespace ompx
+#endif
+
+///}
+
+#endif /* __OMPX_H */


        


More information about the Openmp-commits mailing list