[libc-commits] [libc] [libc] add a simple TTAS spin lock (PR #98846)
Schrodinger ZHU Yifan via libc-commits
libc-commits at lists.llvm.org
Mon Jul 15 20:14:37 PDT 2024
https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/98846
>From 92579a710c922ec47bb1b362f1867d92fe62f268 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sun, 14 Jul 2024 17:01:59 -0700
Subject: [PATCH 1/2] [libc] add a simple TTAS spin lock
---
libc/src/__support/threads/CMakeLists.txt | 9 ++++
libc/src/__support/threads/spin_lock.h | 56 +++++++++++++++++++++++
2 files changed, 65 insertions(+)
create mode 100644 libc/src/__support/threads/spin_lock.h
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index 9ea0b59befe7a..d2e46b8e2574e 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -10,6 +10,15 @@ add_header_library(
sleep.h
)
+add_header_library(
+ spin_lock
+ HDRS
+ spin_lock.h
+ DEPENDS
+ .sleep
+ libc.src.__support.CPP.atomic
+)
+
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
add_subdirectory(${LIBC_TARGET_OS})
endif()
diff --git a/libc/src/__support/threads/spin_lock.h b/libc/src/__support/threads/spin_lock.h
new file mode 100644
index 0000000000000..377c27de3f6c5
--- /dev/null
+++ b/libc/src/__support/threads/spin_lock.h
@@ -0,0 +1,56 @@
+//===-- TTAS Spin Lock ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H
+#define LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H
+
+#include "src/__support/CPP/atomic.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/threads/sleep.h"
+namespace LIBC_NAMESPACE_DECL {
+class SpinLock {
+ cpp::Atomic<bool> flag;
+
+public:
+ LIBC_INLINE constexpr SpinLock() : flag{false} {}
+ LIBC_INLINE bool try_lock() {
+ return !flag.exchange(true, cpp::MemoryOrder::ACQUIRE);
+ }
+ LIBC_INLINE void lock() {
+ // clang-format off
+ // this compiles to the following on armv9a and x86_64:
+ // mov w8, #1 | .LBB0_1:
+ // .LBB0_1: | mov al, 1
+ // swpab w8, w9, [x0] | xchg byte ptr [rdi], al
+ // tbnz w9, #0, .LBB0_3 | test al, 1
+ // b .LBB0_4 | jne .LBB0_3
+ // .LBB0_2: | jmp .LBB0_4
+ // isb | .LBB0_2:
+ // .LBB0_3: | pause
+ // ldrb w9, [x0] | .LBB0_3:
+ // tbnz w9, #0, .LBB0_2 | movzx eax, byte ptr [rdi]
+ // b .LBB0_1 | test al, 1
+ // .LBB0_4: | jne .LBB0_2
+ // ret | jmp .LBB0_1
+ // | .LBB0_4:
+ // | ret
+ // clang-format on
+ // Notice that inside the busy loop .LBB0_2 and .LBB0_3, only instructions
+ // with load semantics are used. swpab/xchg is only issued in outer loop
+ // .LBB0_1. This is useful to avoid extra write traffic. The cache
+ // coherence guarantees "write propagation", so even if the inner loop only
+ // reads with relaxed ordering, the thread will evetually see the write.
+ while (!try_lock())
+ while (flag.load(cpp::MemoryOrder::RELAXED))
+ sleep_briefly();
+ }
+ LIBC_INLINE void unlock() { flag.store(false, cpp::MemoryOrder::RELEASE); }
+};
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H
>From 7a90921b85a0e3d17ed12df42272f7a0cf2107ff Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 15 Jul 2024 20:14:10 -0700
Subject: [PATCH 2/2] [libc] address CR
---
libc/src/__support/threads/spin_lock.h | 33 +++++++++++++++++++++-----
1 file changed, 27 insertions(+), 6 deletions(-)
diff --git a/libc/src/__support/threads/spin_lock.h b/libc/src/__support/threads/spin_lock.h
index 377c27de3f6c5..3385809867866 100644
--- a/libc/src/__support/threads/spin_lock.h
+++ b/libc/src/__support/threads/spin_lock.h
@@ -11,19 +11,26 @@
#include "src/__support/CPP/atomic.h"
#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/properties/architectures.h"
#include "src/__support/threads/sleep.h"
namespace LIBC_NAMESPACE_DECL {
-class SpinLock {
- cpp::Atomic<bool> flag;
+
+template <typename LockWord, typename Return>
+using AtomicOp = Return (cpp::Atomic<LockWord>::*)(LockWord, cpp::MemoryOrder,
+ cpp::MemoryScope);
+template <typename LockWord, AtomicOp<LockWord, LockWord> Acquire,
+ AtomicOp<LockWord, void> Release>
+class SpinLockAdaptor {
+ cpp::Atomic<LockWord> flag;
public:
- LIBC_INLINE constexpr SpinLock() : flag{false} {}
+ LIBC_INLINE constexpr SpinLockAdaptor() : flag{false} {}
LIBC_INLINE bool try_lock() {
- return !flag.exchange(true, cpp::MemoryOrder::ACQUIRE);
+ return !flag.*Acquire(static_cast<LockWord>(1), cpp::MemoryOrder::ACQUIRE);
}
LIBC_INLINE void lock() {
// clang-format off
- // this compiles to the following on armv9a and x86_64:
+ // For normal TTAS, this compiles to the following on armv9a and x86_64:
// mov w8, #1 | .LBB0_1:
// .LBB0_1: | mov al, 1
// swpab w8, w9, [x0] | xchg byte ptr [rdi], al
@@ -49,8 +56,22 @@ class SpinLock {
while (flag.load(cpp::MemoryOrder::RELAXED))
sleep_briefly();
}
- LIBC_INLINE void unlock() { flag.store(false, cpp::MemoryOrder::RELEASE); }
+ LIBC_INLINE void unlock() {
+ flag.*Release(static_cast<LockWord>(0), cpp::MemoryOrder::RELEASE);
+ }
};
+
+// It is reported that atomic operations with higher-order semantics
+// lead to better performance on GPUs.
+#ifdef LIBC_TARGET_ARCH_IS_GPU
+using SpinLock =
+ SpinLockAdaptor<unsigned int, &cpp::Atomic<unsigned int>::fetch_or,
+ &cpp::Atomic<unsigned int>::fetch_and>;
+#else
+using SpinLock = SpinLockAdaptor<bool, &cpp::Atomic<bool>::exchange,
+ &cpp::Atomic<bool>::store>;
+#endif
+
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H
More information about the libc-commits
mailing list