[libc-commits] [libc] [libc] Add memory fence utility to the GPU utilities (PR #91756)

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Fri May 10 08:20:51 PDT 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/91756

>From 4063b825eb8f44c45890dcd940d14f4117f12ff5 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 10 May 2024 10:17:30 -0500
Subject: [PATCH] [libc] Add memory fence utility to the GPU utilities

Summary:
GPUs like to execute instructions in the background until something
excplitely consumes them. We are working on adding some
microbenchmarking code, which requires flushing the pending memory
operations beforehand. This patch simply adds these utility functions
that will be used in the near future.
---
 libc/src/__support/GPU/amdgpu/utils.h | 6 ++++++
 libc/src/__support/GPU/nvptx/utils.h  | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h
index 9b520a6bcf38d..ff30d06f186b5 100644
--- a/libc/src/__support/GPU/amdgpu/utils.h
+++ b/libc/src/__support/GPU/amdgpu/utils.h
@@ -140,6 +140,12 @@ LIBC_INLINE uint32_t get_lane_size() {
   __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
 }
 
+/// Waits for all pending memory operations to complete in program order.
+[[clang::convergent]] LIBC_INLINE void memory_fence() {
+  __builtin_amdgcn_s_waitcnt(0);
+  __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
+}
+
 /// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
 [[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) {
   __builtin_amdgcn_wave_barrier();
diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
index 3f19afb836486..88b8ee2e31d32 100644
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -118,9 +118,13 @@ LIBC_INLINE uint32_t get_lane_size() { return 32; }
   uint32_t mask = static_cast<uint32_t>(lane_mask);
   return __nvvm_vote_ballot_sync(mask, x);
 }
+
 /// Waits for all the threads in the block to converge and issues a fence.
 [[clang::convergent]] LIBC_INLINE void sync_threads() { __syncthreads(); }
 
+/// Waits for all pending memory operations to complete in program order.
+[[clang::convergent]] LIBC_INLINE void memory_fence() { __nvvm_membar_sys(); }
+
 /// Waits for all threads in the warp to reconverge for independent scheduling.
 [[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t mask) {
   __nvvm_bar_warp_sync(static_cast<uint32_t>(mask));



More information about the libc-commits mailing list