[Openmp-commits] [PATCH] D108398: [libomptarget] Specialize amdgpu devicertl on wave size for gfx10

Thu Aug 19 12:00:51 PDT 2021

JonChesterfield created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
JonChesterfield requested review of this revision.
Herald added subscribers: openmp-commits, sstefan1, wdng.
Herald added a reviewer: jdoerfert.
Herald added a project: OpenMP.

Use 32 bit arithmetic instead of relying on llvm to recognise
that the high half of various uint64_t values is zero for wave32 code.

Performance optimisation only. Relies on D108380 <https://reviews.llvm.org/D108380> and D108391 <https://reviews.llvm.org/D108391>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D108398

Files:
  openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
  openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip


Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===================================================================

--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -49,7 +49,13 @@
 
 // Warp vote function
 EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
+#if __AMDGCN_WAVEFRONT_SIZE == 64
   return __builtin_amdgcn_read_exec();
+#elif __AMDGCN_WAVEFRONT_SIZE == 32
+  return __builtin_amdgcn_read_exec_lo();
+#else
+  #error "Unexpected WAVEFRONT_SIZE"
+#endif
 }
 
 static void pteam_mem_barrier(uint32_t num_threads, uint32_t *barrier_state) {
@@ -136,7 +142,13 @@
 EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
 EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+#if __AMDGCN_WAVEFRONT_SIZE == 64
+    return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+#elif __AMDGCN_WAVEFRONT_SIZE == 32
+    return  __builtin_amdgcn_mbcnt_lo(~0u, 0u);
+#else
+  #error "Unexpected WAVEFRONT_SIZE"
+#endif
 }
 
 EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -25,8 +25,6 @@
 #define PRId64 "ld"
 #define PRIu64 "lu"
 
-typedef uint64_t __kmpc_impl_lanemask_t;
-
 #define INLINE inline
 #define NOINLINE __attribute__((noinline))
 #define ALIGN(N) __attribute__((aligned(N)))
@@ -47,6 +45,14 @@
 enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size };
 enum { WARPSIZE = getGridValue().GV_Warp_Size };
 
+namespace detail {
+template <unsigned> struct UnsignedToType;
+template <> struct UnsignedToType<64u> { using type = uint64_t; };
+template <> struct UnsignedToType<32u> { using type = uint32_t; };
+} // namespace detail
+
+using __kmpc_impl_lanemask_t = detail::UnsignedToType<WARPSIZE>::type;
+
 // Maximum number of omp state objects per SM allocated statically in global
 // memory.
 #define OMP_STATE_COUNT 32


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D108398.367579.patch
Type: text/x-patch
Size: 2259 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20210819/5ce850e0/attachment.bin>