[llvm] [NVPTX] Attempt to load params using symbol addition node directly (PR #119935)
Kevin McAfee via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 08:58:54 PST 2025
https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/119935
>From 7a89674bbee410446ef105fb5af0ca2ece571ce8 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 13 Dec 2024 14:48:08 -0800
Subject: [PATCH 1/2] Fix
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 24 ++++++---
llvm/test/CodeGen/NVPTX/param-add.ll | 54 +++++++++++++++++++++
2 files changed, 71 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/NVPTX/param-add.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 39f55716920584..2e66b67dfdcc76 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2482,15 +2482,25 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset,
MVT VT) {
- if (isAddLike(Addr)) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- SDValue base = Addr.getOperand(0);
- if (SelectDirectAddr(base, Base)) {
- Offset =
- CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), VT);
- return true;
+ std::function<std::optional<uint64_t>(SDValue, uint64_t)>
+ FindRootAddressAndTotalOffset =
+ [&](SDValue Addr,
+ uint64_t AccumulatedOffset) -> std::optional<uint64_t> {
+ if (isAddLike(Addr)) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ SDValue PossibleBaseAddr = Addr.getOperand(0);
+ AccumulatedOffset += CN->getZExtValue();
+ if (SelectDirectAddr(PossibleBaseAddr, Base))
+ return AccumulatedOffset;
+ return FindRootAddressAndTotalOffset(PossibleBaseAddr,
+ AccumulatedOffset);
}
}
+ return std::nullopt;
+ };
+ if (auto AccumulatedOffset = FindRootAddressAndTotalOffset(Addr, 0)) {
+ Offset = CurDAG->getTargetConstant(*AccumulatedOffset, SDLoc(OpNode), VT);
+ return true;
}
return false;
}
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
new file mode 100644
index 00000000000000..96206baa489329
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
+
+; REQUIRES: asserts
+; asserts are required for --debug-counter=dagcombine=0 to have the intended
+; effect of disabling DAG combines, which exposes the bug. When combines are
+; enabled the bug does not occur.
+
+%struct.1float = type <{ [1 x float] }>
+
+declare i32 @callee(%struct.1float %a)
+
+define i32 @test(%struct.1float alignstack(32) %data) {
+; CHECK-LABEL: test(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<18>;
+; CHECK-NEXT: .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u8 %r1, [test_param_0+1];
+; CHECK-NEXT: shl.b32 %r2, %r1, 8;
+; CHECK-NEXT: ld.param.u8 %r3, [test_param_0];
+; CHECK-NEXT: or.b32 %r4, %r2, %r3;
+; CHECK-NEXT: ld.param.u8 %r5, [test_param_0+3];
+; CHECK-NEXT: shl.b32 %r6, %r5, 8;
+; CHECK-NEXT: ld.param.u8 %r7, [test_param_0+2];
+; CHECK-NEXT: or.b32 %r8, %r6, %r7;
+; CHECK-NEXT: shl.b32 %r9, %r8, 16;
+; CHECK-NEXT: or.b32 %r17, %r9, %r4;
+; CHECK-NEXT: mov.b32 %f1, %r17;
+; CHECK-NEXT: shr.u32 %r12, %r17, 8;
+; CHECK-NEXT: shr.u32 %r13, %r17, 16;
+; CHECK-NEXT: shr.u32 %r14, %r17, 24;
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .align 1 .b8 param0[4];
+; CHECK-NEXT: st.param.b8 [param0], %r17;
+; CHECK-NEXT: st.param.b8 [param0+1], %r12;
+; CHECK-NEXT: st.param.b8 [param0+2], %r13;
+; CHECK-NEXT: st.param.b8 [param0+3], %r14;
+; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: call.uni (retval0),
+; CHECK-NEXT: callee,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0
+; CHECK-NEXT: );
+; CHECK-NEXT: ld.param.b32 %r15, [retval0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
+; CHECK-NEXT: ret;
+
+ %1 = call i32 @callee(%struct.1float %data)
+ ret i32 %1
+}
>From 8e732716eeedc0e7926696f86e1fff195f68a440 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 13 Jan 2025 16:58:29 +0000
Subject: [PATCH 2/2] Add missing option to ptxas-verify line in test
---
llvm/test/CodeGen/NVPTX/param-add.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
index 96206baa489329..afabc113541c27 100644
--- a/llvm/test/CodeGen/NVPTX/param-add.ll
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | %ptxas-verify %}
; REQUIRES: asserts
; asserts are required for --debug-counter=dagcombine=0 to have the intended
More information about the llvm-commits
mailing list