[Openmp-commits] [PATCH] D108398: [libomptarget] Specialize amdgpu devicertl on wave size for gfx10
Jon Chesterfield via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Thu Sep 9 11:31:36 PDT 2021
JonChesterfield updated this revision to Diff 371669.
JonChesterfield added a comment.
- less macros
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D108398/new/
https://reviews.llvm.org/D108398
Files:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -49,7 +49,12 @@
// Warp vote function
EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
- return __builtin_amdgcn_read_exec();
+ static_assert(WARPSIZE == 64 || WARPSIZE == 32, "");
+ if (WARPSIZE == 64) {
+ return __builtin_amdgcn_read_exec();
+ } else {
+ return __builtin_amdgcn_read_exec_lo();
+ }
}
static void pteam_mem_barrier(uint32_t num_threads, uint32_t *barrier_state) {
@@ -136,7 +141,12 @@
EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
EXTERN unsigned GetWarpSize() { return WARPSIZE; }
EXTERN unsigned GetLaneId() {
- return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+ static_assert(WARPSIZE == 64 || WARPSIZE == 32, "");
+ if (WARPSIZE == 64) {
+ return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+ } else {
+ return __builtin_amdgcn_mbcnt_lo(~0u, 0u);
+ }
}
EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -25,8 +25,6 @@
#define PRId64 "ld"
#define PRIu64 "lu"
-typedef uint64_t __kmpc_impl_lanemask_t;
-
#define INLINE inline
#define NOINLINE __attribute__((noinline))
#define ALIGN(N) __attribute__((aligned(N)))
@@ -51,6 +49,14 @@
enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size };
enum { WARPSIZE = getGridValue().GV_Warp_Size };
+namespace detail {
+template <unsigned> struct UnsignedToType;
+template <> struct UnsignedToType<64u> { using type = uint64_t; };
+template <> struct UnsignedToType<32u> { using type = uint32_t; };
+} // namespace detail
+
+using __kmpc_impl_lanemask_t = detail::UnsignedToType<WARPSIZE>::type;
+
// Maximum number of omp state objects per SM allocated statically in global
// memory.
#define OMP_STATE_COUNT 32
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D108398.371669.patch
Type: text/x-patch
Size: 2248 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20210909/c1bbc56c/attachment-0001.bin>
More information about the Openmp-commits
mailing list