[Openmp-commits] [openmp] 3d3e407 - [libomptarget][nfc] Move omp locks under target_impl

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Tue Dec 17 04:19:12 PST 2019


Author: JonChesterfield
Date: 2019-12-17T12:18:57Z
New Revision: 3d3e4076cd65007007ca639d4f99c0fa671c9f8e

URL: https://github.com/llvm/llvm-project/commit/3d3e4076cd65007007ca639d4f99c0fa671c9f8e
DIFF: https://github.com/llvm/llvm-project/commit/3d3e4076cd65007007ca639d4f99c0fa671c9f8e.diff

LOG: [libomptarget][nfc] Move omp locks under target_impl

Summary:
[libomptarget][nfc] Move omp locks under target_impl

These are likely to be target specific, even down to the lock_t which is
correspondingly moved out of interface.h. The alternative is to include
interface.h in target_impl which substantiatially increases the scope of
those symbols.

The current nvptx implementation deadlocks on amdgcn. The preferred
implementation for that arch is still under discussion - this change
leaves declarations in target_impl.

The functions could be inline for nvptx. I'd prefer to keep the internals
hidden in the target_impl translation unit, but will add the (possibly renamed)
macros to target_impl.h if preferred.

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: jdoerfert

Subscribers: jvesely, mgorny, jfb, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D71574

Added: 
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
    openmp/libomptarget/deviceRTLs/interface.h
    openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
index 6fba1137d07b..f7c75c09362a 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
@@ -13,5 +13,6 @@
 
 #define EXTERN extern "C" __attribute__((device))
 typedef uint64_t __kmpc_impl_lanemask_t;
+typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
 
 #endif

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 3c4e1f38bde2..713a880d9a5c 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -155,6 +155,13 @@ INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
 INLINE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
 INLINE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
 
+// Locks
+EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock);
+EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock);
+
 // DEVICE versions of part of libc
 extern "C" {
 DEVICE __attribute__((noreturn)) void

diff  --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h
index 1516c21f3d4d..81e67184218f 100644
--- a/openmp/libomptarget/deviceRTLs/interface.h
+++ b/openmp/libomptarget/deviceRTLs/interface.h
@@ -30,7 +30,6 @@
 // OpenMP interface
 ////////////////////////////////////////////////////////////////////////////////
 
-typedef uint32_t omp_lock_t;      /* arbitrary type of the right length */
 typedef uint64_t omp_nest_lock_t; /* arbitrary type of the right length */
 
 typedef enum omp_sched_t {

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index b03c6c5c3f76..83308be39e5d 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -55,6 +55,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
       ${devicertl_common_directory}/src/critical.cu
       src/data_sharing.cu
       src/libcall.cu
+      src/target_impl.cu
       ${devicertl_common_directory}/src/loop.cu
       ${devicertl_common_directory}/src/omptarget.cu
       ${devicertl_common_directory}/src/parallel.cu

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index a43f0c6d8d99..baee6d1f45ce 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -336,54 +336,30 @@ EXTERN int omp_get_max_task_priority(void) {
 // locks
 ////////////////////////////////////////////////////////////////////////////////
 
-#define __OMP_SPIN 1000
-#define UNSET 0
-#define SET 1
-
 EXTERN void omp_init_lock(omp_lock_t *lock) {
-  omp_unset_lock(lock);
+  __kmpc_impl_init_lock(lock);
   PRINT0(LD_IO, "call omp_init_lock()\n");
 }
 
 EXTERN void omp_destroy_lock(omp_lock_t *lock) {
-  omp_unset_lock(lock);
+  __kmpc_impl_destroy_lock(lock);
   PRINT0(LD_IO, "call omp_destroy_lock()\n");
 }
 
 EXTERN void omp_set_lock(omp_lock_t *lock) {
-  // int atomicCAS(int* address, int compare, int val);
-  // (old == compare ? val : old)
-
-  // TODO: not sure spinning is a good idea here..
-  while (atomicCAS(lock, UNSET, SET) != UNSET) {
-    clock_t start = clock();
-    clock_t now;
-    for (;;) {
-      now = clock();
-      clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
-      if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
-        break;
-      }
-    }
-  } // wait for 0 to be the read value
-
+  __kmpc_impl_set_lock(lock);
   PRINT0(LD_IO, "call omp_set_lock()\n");
 }
 
 EXTERN void omp_unset_lock(omp_lock_t *lock) {
-  (void)atomicExch(lock, UNSET);
-
+  __kmpc_impl_unset_lock(lock);
   PRINT0(LD_IO, "call omp_unset_lock()\n");
 }
 
 EXTERN int omp_test_lock(omp_lock_t *lock) {
-  // int atomicCAS(int* address, int compare, int val);
-  // (old == compare ? val : old)
-  int ret = atomicAdd(lock, 0);
-
+  int rc = __kmpc_impl_test_lock(lock);
   PRINT(LD_IO, "call omp_test_lock() return %d\n", ret);
-
-  return ret;
+  return rc;
 }
 
 // for xlf Fotran

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h
index 7c9e471e49a6..c5e91c5bf527 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h
@@ -13,5 +13,6 @@
 
 #define EXTERN extern "C" __device__
 typedef uint32_t __kmpc_impl_lanemask_t;
+typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
 
 #endif

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
new file mode 100644
index 000000000000..11f60e65173a
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -0,0 +1,54 @@
+//===---------- target_impl.cu - NVPTX OpenMP GPU options ------- CUDA -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definitions of target specific functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "target_impl.h"
+#include "common/debug.h"
+
+#define __OMP_SPIN 1000
+#define UNSET 0
+#define SET 1
+
+EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock) {
+  omp_unset_lock(lock);
+}
+
+EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock) {
+  omp_unset_lock(lock);
+}
+
+EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
+  // int atomicCAS(int* address, int compare, int val);
+  // (old == compare ? val : old)
+
+  // TODO: not sure spinning is a good idea here..
+  while (atomicCAS(lock, UNSET, SET) != UNSET) {
+    clock_t start = clock();
+    clock_t now;
+    for (;;) {
+      now = clock();
+      clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
+      if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
+        break;
+      }
+    }
+  } // wait for 0 to be the read value
+}
+
+EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock) {
+  (void)atomicExch(lock, UNSET);
+}
+
+EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) {
+  // int atomicCAS(int* address, int compare, int val);
+  // (old == compare ? val : old)
+  return atomicAdd(lock, 0);
+}

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 7a85e744f9d4..350d2cf5f2e1 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -188,4 +188,11 @@ INLINE int GetBlockIdInKernel() { return blockIdx.x; }
 INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
 INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
 
+// Locks
+EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock);
+EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock);
+
 #endif


        


More information about the Openmp-commits mailing list