[Openmp-commits] [openmp] 6f04add - [libomptarget][amdgcn] Build amdgcn devicertl as openmp

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Fri Feb 12 01:51:32 PST 2021


Author: Jon Chesterfield
Date: 2021-02-12T09:51:21Z
New Revision: 6f04addc8b2eee0d88b97facfa4fb7424b4b74bd

URL: https://github.com/llvm/llvm-project/commit/6f04addc8b2eee0d88b97facfa4fb7424b4b74bd
DIFF: https://github.com/llvm/llvm-project/commit/6f04addc8b2eee0d88b97facfa4fb7424b4b74bd.diff

LOG: [libomptarget][amdgcn] Build amdgcn devicertl as openmp

[libomptarget][amdgcn] Build amdgcn devicertl as openmp

Change cmake to build as openmp and fix up some minor errors in the code.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D96533

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
    openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
    openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_smid.hip
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
    openmp/libomptarget/deviceRTLs/common/omptarget.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 8d9abe5d0bbd..0e890b194d0f 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -90,22 +90,26 @@ else()
 endif()
 
 # create libraries
-set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900)
+set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx906)
 if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
   set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
 endif()
 
 macro(add_cuda_bc_library)
   set(cu_cmd ${AOMP_BINDIR}/clang++
+    -xc++
+    -c
     -std=c++14
-    -fcuda-rdc
+    -target amdgcn
+    -emit-llvm
+    -Xclang -aux-triple -Xclang x86_64-unknown-linux-gnu # see nvptx
+    -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
+    -D__AMDGCN__
+    -Xclang -target-cpu -Xclang ${mcpu}
     -fvisibility=default
-    --cuda-device-only
     -Wno-unused-value
-    -x hip
-    -nogpulib -nogpuinc
+    -nogpulib
     -O${optimization_level}
-    --cuda-gpu-arch=${mcpu}
     ${CUDA_DEBUG}
     -I${CMAKE_CURRENT_SOURCE_DIR}/src
     -I${devicertl_base_directory})

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
index 80409d611f6f..1e653dad581e 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
@@ -11,7 +11,7 @@
 
 #include <stdint.h>
 
-#define EXTERN extern "C" __attribute__((device))
+#define EXTERN extern "C"
 typedef uint64_t __kmpc_impl_lanemask_t;
 typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
 

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
index f537fb28318c..77746fa65bc4 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
@@ -26,6 +26,6 @@ DEVICE void __kmpc_impl_init_lock(omp_lock_t *) { warn(); }
 DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *) { warn(); }
 DEVICE void __kmpc_impl_set_lock(omp_lock_t *) { warn(); }
 DEVICE void __kmpc_impl_unset_lock(omp_lock_t *) { warn(); }
-DEVICE int __kmpc_impl_test_lock(omp_lock_t *lock) { warn(); }
+DEVICE int __kmpc_impl_test_lock(omp_lock_t *lock) { warn(); return 0;}
 
 #pragma omp end declare target

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_smid.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_smid.hip
index c85045570de2..f18f8b5a70c8 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_smid.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_smid.hip
@@ -53,7 +53,7 @@ enum {
 // bound on how many compute units are available. Some values in this
 // range may never be returned if there are fewer than 2^CU_ID_SIZE CUs.
 
-DEVICE uint32_t __kmpc_impl_smid() {
+EXTERN uint32_t __kmpc_impl_smid() {
   uint32_t cu_id = __builtin_amdgcn_s_getreg(
       ENCODE_HWREG(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID));
   uint32_t se_id = __builtin_amdgcn_s_getreg(

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index fb41332631bc..6a40c18728bf 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -22,11 +22,9 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#define DEVICE __attribute__((device))
+#define DEVICE
 #define INLINE inline DEVICE
 #define NOINLINE __attribute__((noinline)) DEVICE
-#define SHARED(NAME) __attribute__((shared)) NAME
-#define EXTERN_SHARED(NAME) __attribute__((shared)) NAME
 #define ALIGN(N) __attribute__((aligned(N)))
 
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index cf04b483407c..51eaf710d00f 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -11,7 +11,9 @@
 //===----------------------------------------------------------------------===//
 #pragma omp declare target
 
+#include "common/omptarget.h"
 #include "target_impl.h"
+#include "target_interface.h"
 
 // Implementations initially derived from hcc
 
@@ -66,11 +68,12 @@ EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
   return __builtin_amdgcn_ds_bpermute(index << 2, var);
 }
 
-static DEVICE SHARED uint32_t L1_Barrier;
+uint32_t __kmpc_L1_Barrier [[clang::loader_uninitialized]];
+#pragma allocate(__kmpc_L1_Barrier) allocator(omp_pteam_mem_alloc)
 
 EXTERN void __kmpc_impl_target_init() {
   // Don't have global ctors, and shared memory is not zero init
-  __atomic_store_n(&L1_Barrier, 0u, __ATOMIC_RELEASE);
+  __atomic_store_n(&__kmpc_L1_Barrier, 0u, __ATOMIC_RELEASE);
 }
 
 EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
@@ -94,8 +97,8 @@ EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
   bool isLowest = GetLaneId() == lowestActiveThread;
 
   if (isLowest) {
-    uint32_t load =
-        __atomic_fetch_add(&L1_Barrier, 1, __ATOMIC_RELAXED); // commutative
+    uint32_t load = __atomic_fetch_add(&__kmpc_L1_Barrier, 1,
+                                       __ATOMIC_RELAXED); // commutative
 
     // Record the number of times the barrier has been passed
     uint32_t generation = load & 0xffff0000u;
@@ -107,12 +110,12 @@ EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
       load &= 0xffff0000u; // because bits zeroed second
 
       // Reset the wave counter and release the waiting waves
-      __atomic_store_n(&L1_Barrier, load, __ATOMIC_RELAXED);
+      __atomic_store_n(&__kmpc_L1_Barrier, load, __ATOMIC_RELAXED);
     } else {
       // more waves still to go, spin until generation counter changes
       do {
         __builtin_amdgcn_s_sleep(0);
-        load = __atomic_load_n(&L1_Barrier, __ATOMIC_RELAXED);
+        load = __atomic_load_n(&__kmpc_L1_Barrier, __ATOMIC_RELAXED);
       } while ((load & 0xffff0000u) == generation);
     }
   }

diff  --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
index 76922333c849..e179ca5271fa 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -337,8 +337,9 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
 ////////////////////////////////////////////////////////////////////////////////
 
 INLINE uint32_t __kmpc_impl_ffs(uint32_t x) { return __builtin_ffs(x); }
-
 INLINE uint32_t __kmpc_impl_popc(uint32_t x) { return __builtin_popcount(x); }
+INLINE uint32_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
+INLINE uint32_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
 
 #include "common/omptargeti.h"
 


        


More information about the Openmp-commits mailing list