[llvm] [NVPTX] Attempt to load params using symbol addition node directly (PR #119935)

Kevin McAfee via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 13 08:58:54 PST 2025


https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/119935

>From 7a89674bbee410446ef105fb5af0ca2ece571ce8 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 13 Dec 2024 14:48:08 -0800
Subject: [PATCH 1/2] Fix

---
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 24 ++++++---
 llvm/test/CodeGen/NVPTX/param-add.ll        | 54 +++++++++++++++++++++
 2 files changed, 71 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/param-add.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 39f55716920584..2e66b67dfdcc76 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2482,15 +2482,25 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
                                          SDValue &Base, SDValue &Offset,
                                          MVT VT) {
-  if (isAddLike(Addr)) {
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      SDValue base = Addr.getOperand(0);
-      if (SelectDirectAddr(base, Base)) {
-        Offset =
-            CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), VT);
-        return true;
+  std::function<std::optional<uint64_t>(SDValue, uint64_t)>
+      FindRootAddressAndTotalOffset =
+          [&](SDValue Addr,
+              uint64_t AccumulatedOffset) -> std::optional<uint64_t> {
+    if (isAddLike(Addr)) {
+      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+        SDValue PossibleBaseAddr = Addr.getOperand(0);
+        AccumulatedOffset += CN->getZExtValue();
+        if (SelectDirectAddr(PossibleBaseAddr, Base))
+          return AccumulatedOffset;
+        return FindRootAddressAndTotalOffset(PossibleBaseAddr,
+                                             AccumulatedOffset);
       }
     }
+    return std::nullopt;
+  };
+  if (auto AccumulatedOffset = FindRootAddressAndTotalOffset(Addr, 0)) {
+    Offset = CurDAG->getTargetConstant(*AccumulatedOffset, SDLoc(OpNode), VT);
+    return true;
   }
   return false;
 }
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
new file mode 100644
index 00000000000000..96206baa489329
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
+
+; REQUIRES: asserts
+; asserts are required for --debug-counter=dagcombine=0 to have the intended
+; effect of disabling DAG combines, which exposes the bug. When combines are
+; enabled the bug does not occur.
+
+%struct.1float = type <{ [1 x float] }>
+
+declare i32 @callee(%struct.1float %a)
+
+define i32 @test(%struct.1float alignstack(32) %data) {
+; CHECK-LABEL: test(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<18>;
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %r1, [test_param_0+1];
+; CHECK-NEXT:    shl.b32 %r2, %r1, 8;
+; CHECK-NEXT:    ld.param.u8 %r3, [test_param_0];
+; CHECK-NEXT:    or.b32 %r4, %r2, %r3;
+; CHECK-NEXT:    ld.param.u8 %r5, [test_param_0+3];
+; CHECK-NEXT:    shl.b32 %r6, %r5, 8;
+; CHECK-NEXT:    ld.param.u8 %r7, [test_param_0+2];
+; CHECK-NEXT:    or.b32 %r8, %r6, %r7;
+; CHECK-NEXT:    shl.b32 %r9, %r8, 16;
+; CHECK-NEXT:    or.b32 %r17, %r9, %r4;
+; CHECK-NEXT:    mov.b32 %f1, %r17;
+; CHECK-NEXT:    shr.u32 %r12, %r17, 8;
+; CHECK-NEXT:    shr.u32 %r13, %r17, 16;
+; CHECK-NEXT:    shr.u32 %r14, %r17, 24;
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .align 1 .b8 param0[4];
+; CHECK-NEXT:    st.param.b8 [param0], %r17;
+; CHECK-NEXT:    st.param.b8 [param0+1], %r12;
+; CHECK-NEXT:    st.param.b8 [param0+2], %r13;
+; CHECK-NEXT:    st.param.b8 [param0+3], %r14;
+; CHECK-NEXT:    .param .b32 retval0;
+; CHECK-NEXT:    call.uni (retval0),
+; CHECK-NEXT:    callee,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    ld.param.b32 %r15, [retval0];
+; CHECK-NEXT:    } // callseq 0
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r15;
+; CHECK-NEXT:    ret;
+
+  %1 = call i32 @callee(%struct.1float %data)
+  ret i32 %1
+}

>From 8e732716eeedc0e7926696f86e1fff195f68a440 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Mon, 13 Jan 2025 16:58:29 +0000
Subject: [PATCH 2/2] Add missing option to ptxas-verify line in test

---
 llvm/test/CodeGen/NVPTX/param-add.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
index 96206baa489329..afabc113541c27 100644
--- a/llvm/test/CodeGen/NVPTX/param-add.ll
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | %ptxas-verify %}
 
 ; REQUIRES: asserts
 ; asserts are required for --debug-counter=dagcombine=0 to have the intended



More information about the llvm-commits mailing list