[llvm] [NVPTX] Fixup some issues introduced by 128-bit atomics (PR #155921)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 14:06:30 PDT 2025
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/155921
None
>From 206024f6783915b58f796f1cc3a8b8e04937d8a1 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 28 Aug 2025 21:07:27 +0000
Subject: [PATCH] [NVPTX] Fixup some issues introduced by 128-bit atomics
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 2 ++
llvm/test/CodeGen/NVPTX/atomics-b128.ll | 30 +++++++++++++++++++
.../CodeGen/NVPTX/load-store-atomic.err.ll | 10 -------
3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 5ac45fef851f8..c70f48af33cf2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2325,6 +2325,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
MemSDNode *AN = cast<MemSDNode>(N);
SDLoc dl(N);
+ const SDValue Chain = N->getOperand(0);
const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
SmallVector<SDValue, 5> Ops{Base, Offset};
Ops.append(N->op_begin() + 2, N->op_end());
@@ -2332,6 +2333,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
getI32Imm(getMemOrder(AN), dl),
getI32Imm(getAtomicScope(AN), dl),
getI32Imm(getAddrSpace(AN), dl),
+ Chain,
});
assert(N->getOpcode() == NVPTXISD::ATOMIC_CMP_SWAP_B128 ||
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
index 7cae7ebb642b3..eeed83b6f7927 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-b128.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
@@ -458,6 +458,7 @@ define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2];
; CHECK-NEXT: {
@@ -524,6 +525,7 @@ define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2];
; CHECK-NEXT: {
@@ -590,6 +592,7 @@ define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2];
; CHECK-NEXT: {
@@ -656,6 +659,7 @@ define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2];
; CHECK-NEXT: {
@@ -678,6 +682,7 @@ define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2];
; CHECK-NEXT: {
@@ -700,6 +705,7 @@ define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2];
; CHECK-NEXT: {
@@ -722,6 +728,7 @@ define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2];
; CHECK-NEXT: {
@@ -1001,3 +1008,26 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
ret i128 %ret
}
+
+
+ at si128 = internal addrspace(3) global i128 0, align 16
+
+define void @test_atomicrmw_xchg_const() {
+; CHECK-LABEL: test_atomicrmw_xchg_const(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-NEXT: // demoted variable
+; CHECK-NEXT: .shared .align 16 .b8 si128[16];
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.b64 %rd1, 0;
+; CHECK-NEXT: mov.b64 %rd2, 23;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1};
+; CHECK-NEXT: atom.seq_cst.sys.shared.exch.b128 dst, [si128], amt;
+; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: ret;
+ %res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 seq_cst
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
index a295356d44fab..31889e25142ad 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
@@ -2,16 +2,6 @@
; CHECK: error: unsupported atomic store
; CHECK: error: unsupported atomic load
-; CHECK: error: unsupported atomic store
-; CHECK: error: unsupported atomic load
-
-;; TODO: we could actually support this but we don't currently support b128
-;; load lowering.
-define void @test_i128_generic_atomic(ptr %a, ptr %b) {
- %a.load = load atomic i128, ptr %a seq_cst, align 16
- store atomic i128 %a.load, ptr %b seq_cst, align 16
- ret void
-}
define void @test_i256_global_atomic(ptr addrspace(1) %a, ptr addrspace(1) %b) {
%a.load = load atomic i256, ptr addrspace(1) %a seq_cst, align 32
More information about the llvm-commits
mailing list