[libc-commits] [libc] d29bb70 - [libc][NFC] Remove inline assembly for PTX instructions (#79913)

via libc-commits libc-commits at lists.llvm.org
Mon Jan 29 16:18:08 PST 2024


Author: Joseph Huber
Date: 2024-01-29T18:18:04-06:00
New Revision: d29bb704cb646f9a69546e3517f12b022528d2d2

URL: https://github.com/llvm/llvm-project/commit/d29bb704cb646f9a69546e3517f12b022528d2d2
DIFF: https://github.com/llvm/llvm-project/commit/d29bb704cb646f9a69546e3517f12b022528d2d2.diff

LOG: [libc][NFC] Remove inline assembly for PTX instructions (#79913)

Summary:
Recent patches have implemented builitin versions of these functions.
This patch simply removes uses of inline assembly to hopefully improve
optimizations in this area.

Added: 
    

Modified: 
    libc/src/__support/GPU/nvptx/utils.h
    libc/src/__support/RPC/rpc_util.h
    libc/src/time/gpu/nanosleep.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
index 1519f36850a63c..9fe3caa4914754 100644
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -105,9 +105,7 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
 
 /// Returns the bit-mask of active threads in the current warp.
 [[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() {
-  uint32_t mask;
-  LIBC_INLINE_ASM("activemask.b32 %0;" : "=r"(mask));
-  return mask;
+  return __nvvm_activemask();
 }
 
 /// Copies the value from the first active thread in the warp to the rest.
@@ -141,23 +139,16 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
 
 /// Returns the current value of the GPU's processor clock.
 LIBC_INLINE uint64_t processor_clock() {
-  uint64_t timestamp;
-  LIBC_INLINE_ASM("mov.u64  %0, %%clock64;" : "=l"(timestamp));
-  return timestamp;
+  return __nvvm_read_ptx_sreg_clock64();
 }
 
 /// Returns a global fixed-frequency timer at nanosecond frequency.
 LIBC_INLINE uint64_t fixed_frequency_clock() {
-  uint64_t nsecs;
-  LIBC_INLINE_ASM("mov.u64  %0, %%globaltimer;" : "=l"(nsecs));
-  return nsecs;
+  return __nvvm_read_ptx_sreg_globaltimer();
 }
 
 /// Terminates execution of the calling thread.
-[[noreturn]] LIBC_INLINE void end_program() {
-  LIBC_INLINE_ASM("exit;" ::: "memory");
-  __builtin_unreachable();
-}
+[[noreturn]] LIBC_INLINE void end_program() { __nvvm_exit(); }
 
 } // namespace gpu
 } // namespace LIBC_NAMESPACE

diff  --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h
index 04620b0487f4ad..ff9569298a1ed7 100644
--- a/libc/src/__support/RPC/rpc_util.h
+++ b/libc/src/__support/RPC/rpc_util.h
@@ -22,7 +22,7 @@ namespace rpc {
 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
 LIBC_INLINE void sleep_briefly() {
 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
-  LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");
+  __nvvm_nanosleep(64);
 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
   __builtin_amdgcn_s_sleep(2);
 #elif defined(LIBC_TARGET_ARCH_IS_X86)

diff  --git a/libc/src/time/gpu/nanosleep.cpp b/libc/src/time/gpu/nanosleep.cpp
index a0c735502ff589..e84fe622100e80 100644
--- a/libc/src/time/gpu/nanosleep.cpp
+++ b/libc/src/time/gpu/nanosleep.cpp
@@ -29,7 +29,7 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
   // slept will be somewhere between zero and twice the requested amount. Here
   // we will sleep again if we undershot the time.
   while (cur < end) {
-    LIBC_INLINE_ASM("nanosleep.u32 %0;" ::"r"(nsecs));
+    __nvvm_nanosleep(static_cast<uint32_t>(nsecs));
     cur = gpu::fixed_frequency_clock();
     nsecs -= nsecs > cur - start ? cur - start : 0;
   }


        


More information about the libc-commits mailing list