[clang] [Clang] Fix clang to emit llvm-ir for fadd/fsub atomics (PR #162679)
Amina Chabane via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 27 10:15:05 PDT 2025
https://github.com/Amichaxx updated https://github.com/llvm/llvm-project/pull/162679
>From e654aa7c7d051e355c391f16c5ff3e152fa5784f Mon Sep 17 00:00:00 2001
From: Amichaxx <amina.chabane at arm.com>
Date: Mon, 6 Oct 2025 15:42:18 +0000
Subject: [PATCH 1/3] [LLVM] Fix clang to emit llvm-ir for fadd/fsub atomics
Currently, Clang emits CAS loops for atoic fp compound assignments, instead of atomicrmw instructions. The code in CGExprScalar.cpp now checks for both integer and floating-point
atomic types and emits atomicrmw fadd/fsub instructions in the LLVM IR.
---
clang/lib/CodeGen/CGExprScalar.cpp | 24 +++++++--
clang/test/CodeGen/aarch64-lsfe-atomics.c | 61 +++++++++++++++++++++++
2 files changed, 80 insertions(+), 5 deletions(-)
create mode 100644 clang/test/CodeGen/aarch64-lsfe-atomics.c
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index f319b176513f8..0be5f230406ec 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3847,7 +3847,17 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
llvm::PHINode *atomicPHI = nullptr;
if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
QualType type = atomicTy->getValueType();
- if (!type->isBooleanType() && type->isIntegerType() &&
+ const bool isFloat = type->isFloatingType();
+ const bool isInteger = type->isIntegerType();
+
+ bool isPowerOfTwo = false;
+ if (isFloat || isInteger) {
+ llvm::Type *IRTy = CGF.ConvertType(type);
+ uint64_t StoreBits =
+ CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy);
+ isPowerOfTwo = llvm::isPowerOf2_64(StoreBits);
+ }
+ if (!type->isBooleanType() && (isInteger || isFloat) && isPowerOfTwo &&
!(type->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) &&
CGF.getLangOpts().getSignedOverflowBehavior() !=
@@ -3862,12 +3872,16 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
case BO_ShrAssign:
break;
case BO_AddAssign:
- AtomicOp = llvm::AtomicRMWInst::Add;
- Op = llvm::Instruction::Add;
+ AtomicOp = isFloat ? llvm::AtomicRMWInst::FAdd
+ : llvm::AtomicRMWInst::Add;
+ Op = isFloat ? llvm::Instruction::FAdd
+ : llvm::Instruction::Add;
break;
case BO_SubAssign:
- AtomicOp = llvm::AtomicRMWInst::Sub;
- Op = llvm::Instruction::Sub;
+ AtomicOp = isFloat ? llvm::AtomicRMWInst::FSub
+ : llvm::AtomicRMWInst::Sub;
+ Op = isFloat ? llvm::Instruction::FSub
+ : llvm::Instruction::Sub;
break;
case BO_AndAssign:
AtomicOp = llvm::AtomicRMWInst::And;
diff --git a/clang/test/CodeGen/aarch64-lsfe-atomics.c b/clang/test/CodeGen/aarch64-lsfe-atomics.c
new file mode 100644
index 0000000000000..957e960b3fe3a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-lsfe-atomics.c
@@ -0,0 +1,61 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=aarch64-linux-gnu | FileCheck %s --check-prefix=CHECK-LLVM
+
+_Atomic(float) f;
+_Atomic(double) d;
+
+// CHECK-LLVM-LABEL: define dso_local void @test_float_add(
+// CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4
+// CHECK-LLVM-NEXT: store float [[VAL]], ptr [[VAL_ADDR]], align 4
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load float, ptr [[VAL_ADDR]], align 4
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @f, float [[TMP0]] seq_cst, align 4
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], [[TMP0]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_float_add(float val) {
+ f += val;
+}
+
+// CHECK-LLVM-LABEL: define dso_local void @test_double_add(
+// CHECK-LLVM-SAME: double noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca double, align 8
+// CHECK-LLVM-NEXT: store double [[VAL]], ptr [[VAL_ADDR]], align 8
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load double, ptr [[VAL_ADDR]], align 8
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @d, double [[TMP0]] seq_cst, align 8
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd double [[TMP1]], [[TMP0]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_double_add(double val) {
+ d += val;
+}
+
+// CHECK-LLVM-LABEL: define dso_local void @test_float_sub(
+// CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4
+// CHECK-LLVM-NEXT: store float [[VAL]], ptr [[VAL_ADDR]], align 4
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load float, ptr [[VAL_ADDR]], align 4
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @f, float [[TMP0]] seq_cst, align 4
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub float [[TMP1]], [[TMP0]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_float_sub(float val) {
+ f -= val;
+}
+
+// CHECK-LLVM-LABEL: define dso_local void @test_double_sub(
+// CHECK-LLVM-SAME: double noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca double, align 8
+// CHECK-LLVM-NEXT: store double [[VAL]], ptr [[VAL_ADDR]], align 8
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load double, ptr [[VAL_ADDR]], align 8
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @d, double [[TMP0]] seq_cst, align 8
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub double [[TMP1]], [[TMP0]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_double_sub(double val){
+ d -= val;
+}
>From 653b974e21051fa663ba9196f23baa68c30d7561 Mon Sep 17 00:00:00 2001
From: Amichaxx <amina.chabane at arm.com>
Date: Thu, 9 Oct 2025 15:32:00 +0000
Subject: [PATCH 2/3] clang format
---
clang/lib/CodeGen/CGExprScalar.cpp | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 0be5f230406ec..4028a91ad7639 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3847,14 +3847,13 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
llvm::PHINode *atomicPHI = nullptr;
if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
QualType type = atomicTy->getValueType();
- const bool isFloat = type->isFloatingType();
+ const bool isFloat = type->isFloatingType();
const bool isInteger = type->isIntegerType();
bool isPowerOfTwo = false;
if (isFloat || isInteger) {
llvm::Type *IRTy = CGF.ConvertType(type);
- uint64_t StoreBits =
- CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy);
+ uint64_t StoreBits = CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy);
isPowerOfTwo = llvm::isPowerOf2_64(StoreBits);
}
if (!type->isBooleanType() && (isInteger || isFloat) && isPowerOfTwo &&
@@ -3872,16 +3871,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
case BO_ShrAssign:
break;
case BO_AddAssign:
- AtomicOp = isFloat ? llvm::AtomicRMWInst::FAdd
- : llvm::AtomicRMWInst::Add;
- Op = isFloat ? llvm::Instruction::FAdd
- : llvm::Instruction::Add;
+ AtomicOp =
+ isFloat ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::Add;
+ Op = isFloat ? llvm::Instruction::FAdd : llvm::Instruction::Add;
break;
case BO_SubAssign:
- AtomicOp = isFloat ? llvm::AtomicRMWInst::FSub
- : llvm::AtomicRMWInst::Sub;
- Op = isFloat ? llvm::Instruction::FSub
- : llvm::Instruction::Sub;
+ AtomicOp =
+ isFloat ? llvm::AtomicRMWInst::FSub : llvm::AtomicRMWInst::Sub;
+ Op = isFloat ? llvm::Instruction::FSub : llvm::Instruction::Sub;
break;
case BO_AndAssign:
AtomicOp = llvm::AtomicRMWInst::And;
>From 52d55964c53ee8d531f3b3e032fe9f135e81bde6 Mon Sep 17 00:00:00 2001
From: Amichaxx <amina.chabane at arm.com>
Date: Wed, 22 Oct 2025 10:07:38 +0000
Subject: [PATCH 3/3] Comments addressing
- Modified conditional
- Added bf16 and fp16 tests
---
clang/lib/CodeGen/CGExprScalar.cpp | 2 +-
clang/test/CodeGen/aarch64-lsfe-atomics.c | 64 ++++++++++++++++++++++-
2 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 4028a91ad7639..1bdab1563dda2 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3856,7 +3856,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
uint64_t StoreBits = CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy);
isPowerOfTwo = llvm::isPowerOf2_64(StoreBits);
}
- if (!type->isBooleanType() && (isInteger || isFloat) && isPowerOfTwo &&
+ if (!type->isBooleanType() && (isInteger || (isPowerOfTwo && isFloat)) &&
!(type->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) &&
CGF.getLangOpts().getSignedOverflowBehavior() !=
diff --git a/clang/test/CodeGen/aarch64-lsfe-atomics.c b/clang/test/CodeGen/aarch64-lsfe-atomics.c
index 957e960b3fe3a..428d038df198b 100644
--- a/clang/test/CodeGen/aarch64-lsfe-atomics.c
+++ b/clang/test/CodeGen/aarch64-lsfe-atomics.c
@@ -3,6 +3,8 @@
_Atomic(float) f;
_Atomic(double) d;
+_Atomic(__bf16) bf;
+_Atomic(_Float16) h;
// CHECK-LLVM-LABEL: define dso_local void @test_float_add(
// CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -32,6 +34,36 @@ void test_double_add(double val) {
d += val;
}
+// CHECK-LLVM-LABEL: define dso_local void @test_bf16_add(
+// CHECK-LLVM-SAME: bfloat noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LLVM-NEXT: store bfloat [[VAL]], ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float
+// CHECK-LLVM-NEXT: [[CONV:%.*]] = fptrunc float [[EXT]] to bfloat
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @bf, bfloat [[CONV]] seq_cst, align 2
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd bfloat [[TMP1]], [[CONV]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_bf16_add(__bf16 val){
+ bf += val;
+}
+
+// CHECK-LLVM-LABEL: define dso_local void @test_f16_add(
+// CHECK-LLVM-SAME: half noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca half, align 2
+// CHECK-LLVM-NEXT: store half [[VAL]], ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load half, ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @h, half [[TMP0]] seq_cst, align 2
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd half [[TMP1]], [[TMP0]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_f16_add(_Float16 val){
+ h += val;
+}
+
// CHECK-LLVM-LABEL: define dso_local void @test_float_sub(
// CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0]] {
// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
@@ -57,5 +89,35 @@ void test_float_sub(float val) {
// CHECK-LLVM-NEXT: ret void
//
void test_double_sub(double val){
- d -= val;
+ d -= val;
+}
+
+// CHECK-LLVM-LABEL: define dso_local void @test_bf16_sub(
+// CHECK-LLVM-SAME: bfloat noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-LLVM-NEXT: store bfloat [[VAL]], ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float
+// CHECK-LLVM-NEXT: [[CONV:%.*]] = fptrunc float [[EXT]] to bfloat
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @bf, bfloat [[CONV]] seq_cst, align 2
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub bfloat [[TMP1]], [[CONV]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_bf16_sub(__bf16 val){
+ bf -= val;
+}
+
+// CHECK-LLVM-LABEL: define dso_local void @test_f16_sub(
+// CHECK-LLVM-SAME: half noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-LLVM-NEXT: [[ENTRY:.*:]]
+// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca half, align 2
+// CHECK-LLVM-NEXT: store half [[VAL]], ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load half, ptr [[VAL_ADDR]], align 2
+// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @h, half [[TMP0]] seq_cst, align 2
+// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub half [[TMP1]], [[TMP0]]
+// CHECK-LLVM-NEXT: ret void
+//
+void test_f16_sub(_Float16 val){
+ h -= val;
}
More information about the cfe-commits
mailing list