[clang] [clang][CodeGen] Fix sub-optimal clang CodeGen for __atomic_test_and_set (PR #160098)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 22 06:21:41 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Sirui Mu (Lancern)
<details>
<summary>Changes</summary>
Clang CodeGen for `__atomic_test_and_set` would emit a `store` instruction that stores an `i1` value:
```cpp
bool f(void *ptr) {
return __atomic_test_and_set(ptr, __ATOMIC_RELAXED);
}
```
```llvm
%1 = atomicrmw xchg ptr %0, i8 1 monotonic, align 1
%tobool = icmp ne i8 %1, 0
store i1 %tobool, ptr %atomic-temp, align 1
```
which could lead to suboptimal binary code, for example on x86_64:
```asm
f:
mov al, 1
xchg byte ptr [rdi], al
test al, al
setne al
setne byte ptr [rsp - 1]
ret
```
The last `setne` instruction is obviously redundant. This patch fixes this issue by first zero-extending `%tobool` to an `i8` before the store. This effectively eliminates the last `setne` instruction in the binary code sequence. The `test` and `setne` on `al` is kept still, though.
-----
I'm quite conservative about the codegen in this patch. Vanilla gcc actually emits simpler code for `__atomic_test_and_set`:
```cpp
bool f(void *ptr) {
return __atomic_test_and_set(ptr, __ATOMIC_RELAXED);
}
```
```asm
f:
mov eax, 1
xchg al, BYTE PTR [rdi]
ret
```
It seems like gcc assumes `ptr` would always point to a valid `bool` value as required by the ABI. I'm not sure if we should also make this assumption.
Related to #<!-- -->121943 .
---
Full diff: https://github.com/llvm/llvm-project/pull/160098.diff
2 Files Affected:
- (modified) clang/lib/CodeGen/CGAtomic.cpp (+4-2)
- (modified) clang/test/CodeGen/atomic-test-and-set.c (+30-15)
``````````diff
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 9106c4cd8e139..d44f8e1ea40eb 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -735,8 +735,10 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
CGF.Builder.getInt8(1), Order, Scope, E);
RMWI->setVolatile(E->isVolatile());
llvm::Value *Result = CGF.Builder.CreateIsNotNull(RMWI, "tobool");
- auto *I = CGF.Builder.CreateStore(Result, Dest);
- CGF.addInstToCurrentSourceAtom(I, Result);
+ llvm::Value *ExtResult =
+ CGF.Builder.CreateZExt(Result, CGF.Builder.getInt8Ty(), "tobool.zext");
+ auto *I = CGF.Builder.CreateStore(ExtResult, Dest);
+ CGF.addInstToCurrentSourceAtom(I, ExtResult);
return;
}
diff --git a/clang/test/CodeGen/atomic-test-and-set.c b/clang/test/CodeGen/atomic-test-and-set.c
index 39d4cef16b21d..6438094567f33 100644
--- a/clang/test/CodeGen/atomic-test-and-set.c
+++ b/clang/test/CodeGen/atomic-test-and-set.c
@@ -81,7 +81,8 @@ void clear_dynamic(char *ptr, int order) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -99,7 +100,8 @@ void test_and_set_relaxed(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -117,7 +119,8 @@ void test_and_set_consume(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -135,7 +138,8 @@ void test_and_set_acquire(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -153,7 +157,8 @@ void test_and_set_release(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -171,7 +176,8 @@ void test_and_set_acq_rel(char *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -200,27 +206,32 @@ void test_and_set_seq_cst(char *ptr) {
// CHECK: [[MONOTONIC]]:
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP2]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE:.*]]
// CHECK: [[ACQUIRE]]:
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK-NEXT: store i1 [[TOBOOL1]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT1:%.*]] = zext i1 [[TOBOOL1]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT1]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[RELEASE]]:
// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
// CHECK-NEXT: [[TOBOOL2:%.*]] = icmp ne i8 [[TMP4]], 0
-// CHECK-NEXT: store i1 [[TOBOOL2]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT2:%.*]] = zext i1 [[TOBOOL2]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT2]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[ACQREL]]:
// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
// CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i8 [[TMP5]], 0
-// CHECK-NEXT: store i1 [[TOBOOL3]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT3:%.*]] = zext i1 [[TOBOOL3]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT3]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[SEQCST]]:
// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
// CHECK-NEXT: [[TOBOOL4:%.*]] = icmp ne i8 [[TMP6]], 0
-// CHECK-NEXT: store i1 [[TOBOOL4]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT4:%.*]] = zext i1 [[TOBOOL4]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT4]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
// CHECK: [[ATOMIC_CONTINUE]]:
// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
@@ -239,7 +250,8 @@ void test_and_set_dynamic(char *ptr, int order) {
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[X]], i64 0, i64 0
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg ptr [[ARRAYDECAY]], i8 1 seq_cst, align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP0]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
// CHECK-NEXT: ret void
@@ -301,7 +313,8 @@ void clear_incomplete(struct incomplete *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -318,7 +331,8 @@ void test_and_set_int(int *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
@@ -335,7 +349,8 @@ void test_and_set_void(void *ptr) {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
-// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: ret void
``````````
</details>
https://github.com/llvm/llvm-project/pull/160098
More information about the cfe-commits
mailing list