[Openmp-commits] [openmp] 6a82f0f - [libomptarget] Implement wavefront functions for amdgcn

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Tue Feb 4 13:55:42 PST 2020


Author: Jon Chesterfield
Date: 2020-02-04T21:55:29Z
New Revision: 6a82f0f0b9d1f1f0ea1d1614b11e5b11bfcb9870

URL: https://github.com/llvm/llvm-project/commit/6a82f0f0b9d1f1f0ea1d1614b11e5b11bfcb9870
DIFF: https://github.com/llvm/llvm-project/commit/6a82f0f0b9d1f1f0ea1d1614b11e5b11bfcb9870.diff

LOG: [libomptarget] Implement wavefront functions for amdgcn

Summary: [libomptarget] Implement wavefront functions for amdgcn

Reviewers: jdoerfert, ABataev, grokos, arsenm

Reviewed By: arsenm

Subscribers: saiislam, wdng, arsenm, jvesely, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D73077

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index b3c9e53179bc..8977e1698ab5 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -12,6 +12,32 @@
 
 #include "target_impl.h"
 
+// Implementations initially derived from hcc
+
+static DEVICE uint32_t getLaneId(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
+
+// Initialized with a 64-bit mask with bits set in positions less than the
+// thread's lane number in the warp
+DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
+  uint32_t lane = getLaneId();
+  int64_t ballot = __kmpc_impl_activemask();
+  uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
+  return mask & ballot;
+}
+
+// Initialized with a 64-bit mask with bits set in positions greater than the
+// thread's lane number in the warp
+DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
+  uint32_t lane = getLaneId();
+  if (lane == (WARPSIZE - 1))
+    return 0;
+  uint64_t ballot = __kmpc_impl_activemask();
+  uint64_t mask = (~((uint64_t)0)) << (lane + 1);
+  return mask & ballot;
+}
+
 DEVICE double __kmpc_impl_get_wtick() { return ((double)1E-9); }
 
 EXTERN uint64_t __clock64();
@@ -19,6 +45,28 @@ DEVICE double __kmpc_impl_get_wtime() {
   return ((double)1.0 / 745000000.0) * __clock64();
 }
 
+// Warp vote function
+DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
+  // 33 is ICMP_NE from llvm/include/llvm/IR/InstrTypes.h
+  return __builtin_amdgcn_uicmp(1, 0, 33);
+}
+
+DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t var,
+                                     int32_t srcLane) {
+  int width = WARPSIZE;
+  int self = getLaneId();
+  int index = srcLane + (self & ~(width - 1));
+  return __builtin_amdgcn_ds_bpermute(index << 2, var);
+}
+
+DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
+                                          uint32_t laneDelta, int32_t width) {
+  int self = getLaneId();
+  int index = self + laneDelta;
+  index = (int)(laneDelta + (self & (width - 1))) >= width ? self : index;
+  return __builtin_amdgcn_ds_bpermute(index << 2, var);
+}
+
 EXTERN uint64_t __ockl_get_local_size(uint32_t);
 EXTERN uint64_t __ockl_get_num_groups(uint32_t);
 DEVICE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }


        


More information about the Openmp-commits mailing list