[Openmp-commits] [PATCH] D108398: [libomptarget] Specialize amdgpu devicertl on wave size for gfx10

Jon Chesterfield via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Thu Sep 9 11:31:36 PDT 2021


JonChesterfield updated this revision to Diff 371669.
JonChesterfield added a comment.

- less macros


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108398/new/

https://reviews.llvm.org/D108398

Files:
  openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
  openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip


Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -49,7 +49,12 @@
 
 // Warp vote function
 EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
-  return __builtin_amdgcn_read_exec();
+  static_assert(WARPSIZE == 64 || WARPSIZE == 32, "");
+  if (WARPSIZE == 64) {
+    return __builtin_amdgcn_read_exec();
+  } else {
+    return __builtin_amdgcn_read_exec_lo();
+  }
 }
 
 static void pteam_mem_barrier(uint32_t num_threads, uint32_t *barrier_state) {
@@ -136,7 +141,12 @@
 EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
 EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+  static_assert(WARPSIZE == 64 || WARPSIZE == 32, "");
+  if (WARPSIZE == 64) {
+    return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+  } else {
+    return __builtin_amdgcn_mbcnt_lo(~0u, 0u);
+  }
 }
 
 EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -25,8 +25,6 @@
 #define PRId64 "ld"
 #define PRIu64 "lu"
 
-typedef uint64_t __kmpc_impl_lanemask_t;
-
 #define INLINE inline
 #define NOINLINE __attribute__((noinline))
 #define ALIGN(N) __attribute__((aligned(N)))
@@ -51,6 +49,14 @@
 enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size };
 enum { WARPSIZE = getGridValue().GV_Warp_Size };
 
+namespace detail {
+template <unsigned> struct UnsignedToType;
+template <> struct UnsignedToType<64u> { using type = uint64_t; };
+template <> struct UnsignedToType<32u> { using type = uint32_t; };
+} // namespace detail
+
+using __kmpc_impl_lanemask_t = detail::UnsignedToType<WARPSIZE>::type;
+
 // Maximum number of omp state objects per SM allocated statically in global
 // memory.
 #define OMP_STATE_COUNT 32


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D108398.371669.patch
Type: text/x-patch
Size: 2248 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20210909/c1bbc56c/attachment-0001.bin>


More information about the Openmp-commits mailing list