[libc-commits] [libc] fe04baf - [libc] Silence integer shortening warnings on NVPTX masks

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Wed Aug 9 15:18:16 PDT 2023


Author: Joseph Huber
Date: 2023-08-09T17:18:07-05:00
New Revision: fe04baf1f79c52c89808f3ec41743205227c022d

URL: https://github.com/llvm/llvm-project/commit/fe04baf1f79c52c89808f3ec41743205227c022d
DIFF: https://github.com/llvm/llvm-project/commit/fe04baf1f79c52c89808f3ec41743205227c022d.diff

LOG: [libc] Silence integer shortening warnings on NVPTX masks

Nvidia uses a 32-bit mask, but we store it in a common 64-bit integer to
provide it with a compatible ABI with the AMD implementaiton which may
use a 64-bit mask. Silence these warnings by explicitly casting to the
smaller value, we know this is always legal as the result will always
fit into the smaller value if it was generated on NVPTX.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D157548

Added: 
    

Modified: 
    libc/src/__support/GPU/nvptx/utils.h

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
index 5a921ed93a272c..20b247e98605ae 100644
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -115,8 +115,8 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
   // NOTE: This is not sufficient in all cases on Volta hardware or later. The
   // lane mask returned here is not always the true lane mask used by the
   // intrinsics in cases of incedental or enforced divergence by the user.
-  uint64_t lane_mask = get_lane_mask();
-  uint64_t id = __builtin_ffsl(lane_mask) - 1;
+  uint32_t lane_mask = static_cast<uint32_t>(get_lane_mask());
+  uint32_t id = __builtin_ffs(lane_mask) - 1;
 #if __CUDA_ARCH__ >= 600
   return __nvvm_shfl_sync_idx_i32(lane_mask, x, id, get_lane_size() - 1);
 #else
@@ -127,9 +127,9 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
 /// Returns a bitmask of threads in the current lane for which \p x is true.
 [[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
 #if __CUDA_ARCH__ >= 600
-  return __nvvm_vote_ballot_sync(lane_mask, x);
+  return __nvvm_vote_ballot_sync(static_cast<uint32_t>(lane_mask), x);
 #else
-  return lane_mask & __nvvm_vote_ballot(x);
+  return static_cast<uint32_t>(lane_mask) & __nvvm_vote_ballot(x);
 #endif
 }
 /// Waits for all the threads in the block to converge and issues a fence.
@@ -137,7 +137,7 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
 
 /// Waits for all threads in the warp to reconverge for independent scheduling.
 [[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t mask) {
-  __nvvm_bar_warp_sync(mask);
+  __nvvm_bar_warp_sync(static_cast<uint32_t>(mask));
 }
 
 /// Returns the current value of the GPU's processor clock.


        


More information about the libc-commits mailing list