[llvm] [OpenMP] Replace most GPU helpers with ones from <gpuintrin.h> (PR #125771)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 21:24:16 PST 2025
================
@@ -15,116 +15,48 @@
#include "Interface.h"
#include "Mapping.h"
-using namespace ompx;
-
-namespace impl {
-
-void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
- static_assert(sizeof(unsigned long) == 8, "");
- *LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL);
- *HighBits = static_cast<uint32_t>((Val & 0xFFFFFFFF00000000UL) >> 32);
-}
-
-uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
- return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
-}
-
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
-int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
- int32_t Width);
-
-uint64_t ballotSync(uint64_t Mask, int32_t Pred);
-
-/// AMDGCN Implementation
-///
-///{
-#ifdef __AMDGPU__
-
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
- int Self = mapping::getThreadIdInWarp();
- int Index = SrcLane + (Self & ~(Width - 1));
- return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
-}
-
-int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
- int32_t Width) {
- int Self = mapping::getThreadIdInWarp();
- int Index = Self + LaneDelta;
- Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index;
- return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
-}
+#include <gpuintrin.h>
-uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
- return Mask & __builtin_amdgcn_ballot_w64(Pred);
-}
-
-bool isSharedMemPtr(const void *Ptr) {
- return __builtin_amdgcn_is_shared(
- (const __attribute__((address_space(0))) void *)Ptr);
-}
-#endif
-///}
-
-/// NVPTX Implementation
-///
-///{
-#ifdef __NVPTX__
-
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
- return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, Width - 1);
-}
-
-int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
- int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f;
- return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
-}
-
-uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
- return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred);
-}
-
-bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
-
-#endif
-///}
-} // namespace impl
+using namespace ompx;
uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) {
- return impl::Pack(LowBits, HighBits);
+ return (uint64_t(HighBits) << 32) | uint64_t(LowBits);
}
void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
- impl::Unpack(Val, &LowBits, &HighBits);
+ static_assert(sizeof(unsigned long) == 8, "");
----------------
shiltian wrote:
Does it really matter for literal values?
https://github.com/llvm/llvm-project/pull/125771
More information about the llvm-commits
mailing list