[llvm] [NVPTX] Fixup some issues introduced by 128-bit atomics (PR #155921)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 14:06:30 PDT 2025


https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/155921

None

>From 206024f6783915b58f796f1cc3a8b8e04937d8a1 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 28 Aug 2025 21:07:27 +0000
Subject: [PATCH] [NVPTX] Fixup some issues introduced by 128-bit atomics

---
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp   |  2 ++
 llvm/test/CodeGen/NVPTX/atomics-b128.ll       | 30 +++++++++++++++++++
 .../CodeGen/NVPTX/load-store-atomic.err.ll    | 10 -------
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 5ac45fef851f8..c70f48af33cf2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2325,6 +2325,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
   MemSDNode *AN = cast<MemSDNode>(N);
   SDLoc dl(N);
 
+  const SDValue Chain = N->getOperand(0);
   const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
   SmallVector<SDValue, 5> Ops{Base, Offset};
   Ops.append(N->op_begin() + 2, N->op_end());
@@ -2332,6 +2333,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
       getI32Imm(getMemOrder(AN), dl),
       getI32Imm(getAtomicScope(AN), dl),
       getI32Imm(getAddrSpace(AN), dl),
+      Chain,
   });
 
   assert(N->getOpcode() == NVPTXISD::ATOMIC_CMP_SWAP_B128 ||
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
index 7cae7ebb642b3..eeed83b6f7927 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-b128.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
@@ -458,6 +458,7 @@ define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -524,6 +525,7 @@ define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -590,6 +592,7 @@ define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -656,6 +659,7 @@ define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -678,6 +682,7 @@ define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2];
 ; CHECK-NEXT:    {
@@ -700,6 +705,7 @@ define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2];
 ; CHECK-NEXT:    {
@@ -722,6 +728,7 @@ define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -1001,3 +1008,26 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
   %ret = atomicrmw umax ptr %ptr, i128 %val monotonic
   ret i128 %ret
 }
+
+
+ at si128 = internal addrspace(3) global i128 0, align 16
+
+define void @test_atomicrmw_xchg_const() {
+; CHECK-LABEL: test_atomicrmw_xchg_const(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-NEXT:    // demoted variable
+; CHECK-NEXT:    .shared .align 16 .b8 si128[16];
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b64 %rd1, 0;
+; CHECK-NEXT:    mov.b64 %rd2, 23;
+; CHECK-NEXT:    {
+; CHECK-NEXT:    .reg .b128 amt, dst;
+; CHECK-NEXT:    mov.b128 amt, {%rd2, %rd1};
+; CHECK-NEXT:    atom.seq_cst.sys.shared.exch.b128 dst, [si128], amt;
+; CHECK-NEXT:    mov.b128 {%rd3, %rd4}, dst;
+; CHECK-NEXT:    }
+; CHECK-NEXT:    ret;
+	%res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 seq_cst
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
index a295356d44fab..31889e25142ad 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
@@ -2,16 +2,6 @@
 
 ; CHECK: error: unsupported atomic store
 ; CHECK: error: unsupported atomic load
-; CHECK: error: unsupported atomic store
-; CHECK: error: unsupported atomic load
-
-;; TODO: we could actually support this but we don't currently support b128
-;;       load lowering.
-define void @test_i128_generic_atomic(ptr %a, ptr %b) {
-  %a.load = load atomic i128, ptr %a seq_cst, align 16
-  store atomic i128 %a.load, ptr %b seq_cst, align 16
-  ret void
-}
 
 define void @test_i256_global_atomic(ptr addrspace(1) %a, ptr addrspace(1) %b) {
   %a.load = load atomic i256, ptr addrspace(1) %a seq_cst, align 32



More information about the llvm-commits mailing list