[Openmp-commits] [PATCH] D108398: [libomptarget] Specialize amdgpu devicertl on wave size for gfx10
Jon Chesterfield via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Thu Aug 19 12:00:51 PDT 2021
JonChesterfield created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
JonChesterfield requested review of this revision.
Herald added subscribers: openmp-commits, sstefan1, wdng.
Herald added a reviewer: jdoerfert.
Herald added a project: OpenMP.
Use 32 bit arithmetic instead of relying on llvm to recognise
that the high half of various uint64_t values is zero for wave32 code.
Performance optimisation only. Relies on D108380 <https://reviews.llvm.org/D108380> and D108391 <https://reviews.llvm.org/D108391>.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D108398
Files:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -49,7 +49,13 @@
// Warp vote function
EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
+#if __AMDGCN_WAVEFRONT_SIZE == 64
return __builtin_amdgcn_read_exec();
+#elif __AMDGCN_WAVEFRONT_SIZE == 32
+ return __builtin_amdgcn_read_exec_lo();
+#else
+ #error "Unexpected WAVEFRONT_SIZE"
+#endif
}
static void pteam_mem_barrier(uint32_t num_threads, uint32_t *barrier_state) {
@@ -136,7 +142,13 @@
EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
EXTERN unsigned GetWarpSize() { return WARPSIZE; }
EXTERN unsigned GetLaneId() {
- return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+#if __AMDGCN_WAVEFRONT_SIZE == 64
+ return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+#elif __AMDGCN_WAVEFRONT_SIZE == 32
+ return __builtin_amdgcn_mbcnt_lo(~0u, 0u);
+#else
+ #error "Unexpected WAVEFRONT_SIZE"
+#endif
}
EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -25,8 +25,6 @@
#define PRId64 "ld"
#define PRIu64 "lu"
-typedef uint64_t __kmpc_impl_lanemask_t;
-
#define INLINE inline
#define NOINLINE __attribute__((noinline))
#define ALIGN(N) __attribute__((aligned(N)))
@@ -47,6 +45,14 @@
enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size };
enum { WARPSIZE = getGridValue().GV_Warp_Size };
+namespace detail {
+template <unsigned> struct UnsignedToType;
+template <> struct UnsignedToType<64u> { using type = uint64_t; };
+template <> struct UnsignedToType<32u> { using type = uint32_t; };
+} // namespace detail
+
+using __kmpc_impl_lanemask_t = detail::UnsignedToType<WARPSIZE>::type;
+
// Maximum number of omp state objects per SM allocated statically in global
// memory.
#define OMP_STATE_COUNT 32
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D108398.367579.patch
Type: text/x-patch
Size: 2259 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20210819/5ce850e0/attachment.bin>
More information about the Openmp-commits
mailing list