[clang] [llvm] [NVPTX] Auto-Upgrade llvm.nvvm.atomic.load.{inc,dec}.32 (PR #134111)
Alex MacLean via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 8 10:23:50 PDT 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/134111
>From 46de785e801bf8ca87e01aee9ad0a13ac07a47d6 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 1 Apr 2025 20:22:24 +0000
Subject: [PATCH] [NVPTX] Auto-Upgrade llvm.nvvm.atomic.load.{inc,dec}.32
---
clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp | 18 ++-----
clang/test/CodeGen/builtins-nvptx.c | 4 +-
llvm/include/llvm/IR/IntrinsicsNVVM.td | 10 +---
.../include/llvm/Target/TargetSelectionDAG.td | 2 +
llvm/lib/IR/AutoUpgrade.cpp | 9 ++++
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 15 ++++--
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 4 +-
.../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 52 +++++++++----------
.../Assembler/auto_upgrade_nvvm_intrinsics.ll | 16 +++++-
llvm/test/CodeGen/NVPTX/atomics.ll | 36 ++++++++++++-
10 files changed, 107 insertions(+), 59 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
index aaac19b229905..0f7ab9fd3b099 100644
--- a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
@@ -481,21 +481,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
AtomicOrdering::SequentiallyConsistent);
}
- case NVPTX::BI__nvvm_atom_inc_gen_ui: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- Function *FnALI32 =
- CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
- return Builder.CreateCall(FnALI32, {Ptr, Val});
- }
+ case NVPTX::BI__nvvm_atom_inc_gen_ui:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UIncWrap, E);
- case NVPTX::BI__nvvm_atom_dec_gen_ui: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- Value *Val = EmitScalarExpr(E->getArg(1));
- Function *FnALD32 =
- CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
- return Builder.CreateCall(FnALD32, {Ptr, Val});
- }
+ case NVPTX::BI__nvvm_atom_dec_gen_ui:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UDecWrap, E);
case NVPTX::BI__nvvm_ldg_c:
case NVPTX::BI__nvvm_ldg_sc:
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index ffa41c85c2734..71b29849618b6 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -333,10 +333,10 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df,
// CHECK: atomicrmw fadd ptr {{.*}} seq_cst, align 4
__nvvm_atom_add_gen_f(fp, f);
- // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0
+ // CHECK: atomicrmw uinc_wrap ptr {{.*}} seq_cst, align 4
__nvvm_atom_inc_gen_ui(uip, ui);
- // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0
+ // CHECK: atomicrmw udec_wrap ptr {{.*}} seq_cst, align 4
__nvvm_atom_dec_gen_ui(uip, ui);
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 3e9588a515c9e..4aeb1d8a2779e 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -124,6 +124,8 @@
// * llvm.nvvm.ldg.global.f --> ibid.
// * llvm.nvvm.ldg.global.p --> ibid.
// * llvm.nvvm.swap.lo.hi.b64 --> llvm.fshl(x, x, 32)
+// * llvm.nvvm.atomic.load.inc.32 --> atomicrmw uinc_wrap
+// * llvm.nvvm.atomic.load.dec.32 --> atomicrmw udec_wrap
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
@@ -1633,14 +1635,6 @@ let TargetPrefix = "nvvm" in {
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-// Atomics not available as llvm intrinsics.
- def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
- [llvm_anyptr_ty, llvm_i32_ty],
- [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
- def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
- [llvm_anyptr_ty, llvm_i32_ty],
- [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
-
class SCOPED_ATOMIC2_impl<LLVMType elty>
: Intrinsic<[elty],
[llvm_anyptr_ty, LLVMMatchType<0>],
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 42a5fbec95174..9c241b6c4df0f 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -1825,6 +1825,8 @@ defm atomic_load_min : binary_atomic_op<atomic_load_min>;
defm atomic_load_max : binary_atomic_op<atomic_load_max>;
defm atomic_load_umin : binary_atomic_op<atomic_load_umin>;
defm atomic_load_umax : binary_atomic_op<atomic_load_umax>;
+defm atomic_load_uinc_wrap : binary_atomic_op<atomic_load_uinc_wrap>;
+defm atomic_load_udec_wrap : binary_atomic_op<atomic_load_udec_wrap>;
defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>;
/// Atomic load which zeroes the excess high bits.
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 963fb1b6ad8c0..0b329d91c3c7c 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1302,6 +1302,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
else if (Name.consume_front("atomic.load.add."))
// nvvm.atomic.load.add.{f32.p,f64.p}
Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
+ else if (Name.consume_front("atomic.load.") && Name.consume_back(".32"))
+ // nvvm.atomic.load.{inc,dec}.32
+ Expand = Name == "inc" || Name == "dec";
else if (Name.consume_front("bitcast."))
// nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
Expand =
@@ -2314,6 +2317,12 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
Value *Val = CI->getArgOperand(1);
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
AtomicOrdering::SequentiallyConsistent);
+ } else if (Name.consume_front("atomic.load.") && Name.consume_back(".32")) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+ auto Op = Name == "inc" ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
+ Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
+ AtomicOrdering::SequentiallyConsistent);
} else if (Name.consume_front("max.") &&
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
Name == "ui" || Name == "ull")) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b566cdd4b6bfc..904890b01596d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4067,9 +4067,6 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
return true;
}
- case Intrinsic::nvvm_atomic_load_inc_32:
- case Intrinsic::nvvm_atomic_load_dec_32:
-
case Intrinsic::nvvm_atomic_add_gen_f_cta:
case Intrinsic::nvvm_atomic_add_gen_f_sys:
case Intrinsic::nvvm_atomic_add_gen_i_cta:
@@ -6145,6 +6142,18 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
default:
llvm_unreachable("unsupported width encountered");
}
+ case AtomicRMWInst::BinOp::UIncWrap:
+ case AtomicRMWInst::BinOp::UDecWrap:
+ switch (ITy->getBitWidth()) {
+ case 32:
+ return AtomicExpansionKind::None;
+ case 8:
+ case 16:
+ case 64:
+ return AtomicExpansionKind::CmpXChg;
+ default:
+ llvm_unreachable("unsupported width encountered");
+ }
}
return AtomicExpansionKind::CmpXChg;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 34cb63e44ca71..8528ff702f236 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2070,8 +2070,8 @@ defm INT_PTX_ATOMIC_UMIN_32 : F_ATOMIC_2_AS<I32RT, atomic_load_umin_i32, "min.u3
defm INT_PTX_ATOMIC_UMIN_64 : F_ATOMIC_2_AS<I64RT, atomic_load_umin_i64, "min.u64", [hasSM<32>]>;
// atom_inc atom_dec
-defm INT_PTX_ATOM_INC_32 : F_ATOMIC_2_AS<I32RT, int_nvvm_atomic_load_inc_32, "inc.u32">;
-defm INT_PTX_ATOM_DEC_32 : F_ATOMIC_2_AS<I32RT, int_nvvm_atomic_load_dec_32, "dec.u32">;
+defm INT_PTX_ATOM_INC_32 : F_ATOMIC_2_AS<I32RT, atomic_load_uinc_wrap_i32, "inc.u32">;
+defm INT_PTX_ATOM_DEC_32 : F_ATOMIC_2_AS<I32RT, atomic_load_udec_wrap_i32, "dec.u32">;
// atom_and
defm INT_PTX_ATOM_AND_32 : F_ATOMIC_2_AS<I32RT, atomic_load_and_i32, "and.b32">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index e359735c20750..81ad01bea8867 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -46,33 +46,31 @@ static bool readsLaneId(const IntrinsicInst *II) {
// Whether the given intrinsic is an atomic instruction in PTX.
static bool isNVVMAtomic(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::nvvm_atomic_load_inc_32:
- case Intrinsic::nvvm_atomic_load_dec_32:
-
- case Intrinsic::nvvm_atomic_add_gen_f_cta:
- case Intrinsic::nvvm_atomic_add_gen_f_sys:
- case Intrinsic::nvvm_atomic_add_gen_i_cta:
- case Intrinsic::nvvm_atomic_add_gen_i_sys:
- case Intrinsic::nvvm_atomic_and_gen_i_cta:
- case Intrinsic::nvvm_atomic_and_gen_i_sys:
- case Intrinsic::nvvm_atomic_cas_gen_i_cta:
- case Intrinsic::nvvm_atomic_cas_gen_i_sys:
- case Intrinsic::nvvm_atomic_dec_gen_i_cta:
- case Intrinsic::nvvm_atomic_dec_gen_i_sys:
- case Intrinsic::nvvm_atomic_inc_gen_i_cta:
- case Intrinsic::nvvm_atomic_inc_gen_i_sys:
- case Intrinsic::nvvm_atomic_max_gen_i_cta:
- case Intrinsic::nvvm_atomic_max_gen_i_sys:
- case Intrinsic::nvvm_atomic_min_gen_i_cta:
- case Intrinsic::nvvm_atomic_min_gen_i_sys:
- case Intrinsic::nvvm_atomic_or_gen_i_cta:
- case Intrinsic::nvvm_atomic_or_gen_i_sys:
- case Intrinsic::nvvm_atomic_exch_gen_i_cta:
- case Intrinsic::nvvm_atomic_exch_gen_i_sys:
- case Intrinsic::nvvm_atomic_xor_gen_i_cta:
- case Intrinsic::nvvm_atomic_xor_gen_i_sys:
- return true;
+ default:
+ return false;
+ case Intrinsic::nvvm_atomic_add_gen_f_cta:
+ case Intrinsic::nvvm_atomic_add_gen_f_sys:
+ case Intrinsic::nvvm_atomic_add_gen_i_cta:
+ case Intrinsic::nvvm_atomic_add_gen_i_sys:
+ case Intrinsic::nvvm_atomic_and_gen_i_cta:
+ case Intrinsic::nvvm_atomic_and_gen_i_sys:
+ case Intrinsic::nvvm_atomic_cas_gen_i_cta:
+ case Intrinsic::nvvm_atomic_cas_gen_i_sys:
+ case Intrinsic::nvvm_atomic_dec_gen_i_cta:
+ case Intrinsic::nvvm_atomic_dec_gen_i_sys:
+ case Intrinsic::nvvm_atomic_inc_gen_i_cta:
+ case Intrinsic::nvvm_atomic_inc_gen_i_sys:
+ case Intrinsic::nvvm_atomic_max_gen_i_cta:
+ case Intrinsic::nvvm_atomic_max_gen_i_sys:
+ case Intrinsic::nvvm_atomic_min_gen_i_cta:
+ case Intrinsic::nvvm_atomic_min_gen_i_sys:
+ case Intrinsic::nvvm_atomic_or_gen_i_cta:
+ case Intrinsic::nvvm_atomic_or_gen_i_sys:
+ case Intrinsic::nvvm_atomic_exch_gen_i_cta:
+ case Intrinsic::nvvm_atomic_exch_gen_i_sys:
+ case Intrinsic::nvvm_atomic_xor_gen_i_cta:
+ case Intrinsic::nvvm_atomic_xor_gen_i_sys:
+ return true;
}
}
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 588e79a7428a4..74b9640df6977 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -52,6 +52,9 @@ declare i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr, i32)
declare ptr @llvm.nvvm.ldg.global.p.p0(ptr, i32)
declare float @llvm.nvvm.ldg.global.f.f32.p0(ptr, i32)
+declare i32 @llvm.nvvm.atomic.load.inc.32(ptr, i32)
+declare i32 @llvm.nvvm.atomic.load.dec.32(ptr, i32)
+
; CHECK-LABEL: @simple_upgrade
define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
@@ -224,4 +227,15 @@ define void @ldg(ptr %p0, ptr addrspace(1) %p1) {
%v6 = call float @llvm.nvvm.ldg.global.f.f32.p0(ptr %p0, i32 16)
ret void
-}
\ No newline at end of file
+}
+
+; CHECK-LABEL: @atomics
+define i32 @atomics(ptr %p0, i32 %a) {
+; CHECK: %1 = atomicrmw uinc_wrap ptr %p0, i32 %a seq_cst
+; CHECK: %2 = atomicrmw udec_wrap ptr %p0, i32 %a seq_cst
+
+ %r1 = call i32 @llvm.nvvm.atomic.load.inc.32(ptr %p0, i32 %a)
+ %r2 = call i32 @llvm.nvvm.atomic.load.dec.32(ptr %p0, i32 %a)
+ ret i32 %r2
+}
+
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index e1d9aaf7cfb20..bb04aa856d656 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -313,6 +313,38 @@ define i64 @atom19(ptr %subr, i64 %val) {
ret i64 %ret
}
+define i32 @atom20(ptr %subr, i32 %val) {
+; CHECK-LABEL: atom20(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [atom20_param_0];
+; CHECK-NEXT: ld.param.u32 %r1, [atom20_param_1];
+; CHECK-NEXT: atom.inc.u32 %r2, [%rd1], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %ret = atomicrmw uinc_wrap ptr %subr, i32 %val seq_cst
+ ret i32 %ret
+}
+
+define i32 @atom21(ptr %subr, i32 %val) {
+; CHECK-LABEL: atom21(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [atom21_param_0];
+; CHECK-NEXT: ld.param.u32 %r1, [atom21_param_1];
+; CHECK-NEXT: atom.dec.u32 %r2, [%rd1], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %ret = atomicrmw udec_wrap ptr %subr, i32 %val seq_cst
+ ret i32 %ret
+}
+
declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
; CHECK-LABEL: atomic_add_f32_generic
@@ -409,7 +441,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-NEXT: not.b32 %r2, %r9;
; CHECK-NEXT: ld.u32 %r16, [%rd1];
; CHECK-NEXT: cvt.f32.f16 %f2, %rs1;
-; CHECK-NEXT: $L__BB22_1: // %atomicrmw.start
+; CHECK-NEXT: $L__BB24_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u32 %r10, %r16, %r1;
; CHECK-NEXT: cvt.u16.u32 %rs2, %r10;
@@ -424,7 +456,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r14;
; CHECK-NEXT: setp.ne.s32 %p1, %r5, %r16;
; CHECK-NEXT: mov.b32 %r16, %r5;
-; CHECK-NEXT: @%p1 bra $L__BB22_1;
+; CHECK-NEXT: @%p1 bra $L__BB24_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: shr.u32 %r15, %r5, %r1;
; CHECK-NEXT: cvt.u16.u32 %rs4, %r15;
More information about the cfe-commits
mailing list