[clang] 17d1523 - [Clang] Minor fixes to 'gpuintrin.h' header
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 28 10:07:09 PST 2025
Author: Joseph Huber
Date: 2025-01-28T12:07:02-06:00
New Revision: 17d1523207c6d5fb6b1b47ccf0406a0bb58cb38d
URL: https://github.com/llvm/llvm-project/commit/17d1523207c6d5fb6b1b47ccf0406a0bb58cb38d
DIFF: https://github.com/llvm/llvm-project/commit/17d1523207c6d5fb6b1b47ccf0406a0bb58cb38d.diff
LOG: [Clang] Minor fixes to 'gpuintrin.h' header
Summary:
The bitmask gives different results to the AMDGPU implementation so it's
not needed. Also fix some comments and casts.
Added:
Modified:
clang/lib/Headers/amdgpuintrin.h
clang/lib/Headers/nvptxintrin.h
Removed:
################################################################################
diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 720674a85f52cf..038605605462f8 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -158,16 +158,16 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x) {
((uint64_t)__builtin_amdgcn_ds_bpermute(__idx << 2, __lo));
}
-// Returns true if the flat pointer points to CUDA 'shared' memory.
+// Returns true if the flat pointer points to AMDGPU 'shared' memory.
_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
- return __builtin_amdgcn_is_shared((void __attribute__((address_space(0))) *)((
+ return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)((
void [[clang::opencl_generic]] *)ptr));
}
-// Returns true if the flat pointer points to CUDA 'local' memory.
+// Returns true if the flat pointer points to AMDGPU 'private' memory.
_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_private(void *ptr) {
- return __builtin_amdgcn_is_private((void __attribute__((
- address_space(0))) *)((void [[clang::opencl_generic]] *)ptr));
+ return __builtin_amdgcn_is_private((void [[clang::address_space(0)]] *)((
+ void [[clang::opencl_generic]] *)ptr));
}
// Terminates execution of the associated wavefront.
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index 962dca9cf03126..fb2864eab6a09d 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -151,9 +151,7 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
_DEFAULT_FN_ATTRS static __inline__ uint32_t
__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) {
uint32_t __mask = (uint32_t)__lane_mask;
- uint32_t __bitmask = (__mask >> __idx) & 1u;
- return -__bitmask &
- __nvvm_shfl_sync_idx_i32(__mask, __x, __idx, __gpu_num_lanes() - 1u);
+ return __nvvm_shfl_sync_idx_i32(__mask, __x, __idx, __gpu_num_lanes() - 1u);
}
// Shuffles the the lanes inside the warp according to the given index.
@@ -162,10 +160,9 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x) {
uint32_t __hi = (uint32_t)(__x >> 32ull);
uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
uint32_t __mask = (uint32_t)__lane_mask;
- uint64_t __bitmask = (__mask >> __idx) & 1u;
- return -__bitmask & ((uint64_t)__nvvm_shfl_sync_idx_i32(
- __mask, __hi, __idx, __gpu_num_lanes() - 1u)
- << 32ull) |
+ return ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __hi, __idx,
+ __gpu_num_lanes() - 1u)
+ << 32ull) |
((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __idx,
__gpu_num_lanes() - 1u));
}
More information about the cfe-commits
mailing list