[llvm] 83d9ed2 - [NVPTX] Emit `NVPTXISD::DYNAMIC_STACKALLOC`'s chain (#101714)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 5 10:53:36 PDT 2024
Author: Justin Fargnoli
Date: 2024-08-05T10:53:33-07:00
New Revision: 83d9ed29c1401268266e0c0fc012c1b180f70218
URL: https://github.com/llvm/llvm-project/commit/83d9ed29c1401268266e0c0fc012c1b180f70218
DIFF: https://github.com/llvm/llvm-project/commit/83d9ed29c1401268266e0c0fc012c1b180f70218.diff
LOG: [NVPTX] Emit `NVPTXISD::DYNAMIC_STACKALLOC`'s chain (#101714)
`LowerDYNAMIC_STACKALLOC()` emits the `dynamic_stackalloc` chain operand
instead of the chain produced by the `NVPTXISD::DYNAMIC_STACKALLOC`. Fix
this behavior and don't produce an unnecessary `ISD::MERGE_VALUES`.
Added:
llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 6975412ce5d35..516fc7339a4bf 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2232,18 +2232,12 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDLoc DL(Op.getNode());
// The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.
- if (nvTM->is64Bit())
- Size = DAG.getZExtOrTrunc(Size, DL, MVT::i64);
- else
- Size = DAG.getZExtOrTrunc(Size, DL, MVT::i32);
+ MVT ValueSizeTy = nvTM->is64Bit() ? MVT::i64 : MVT::i32;
- SDValue AllocOps[] = {Chain, Size,
+ SDValue AllocOps[] = {Chain, DAG.getZExtOrTrunc(Size, DL, ValueSizeTy),
DAG.getTargetConstant(Align, DL, MVT::i32)};
- SDValue Alloca = DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL,
- nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps);
-
- SDValue MergeOps[] = {Alloca, Chain};
- return DAG.getMergeValues(MergeOps, DL);
+ EVT RetTypes[] = {ValueSizeTy, MVT::Other};
+ return DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL, RetTypes, AllocOps);
}
// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
diff --git a/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll b/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
new file mode 100644
index 0000000000000..16255893b1c3d
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+define void @foo(i64 %a, ptr %p0, ptr %p1) {
+; CHECK-LABEL: foo(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
+; CHECK-NEXT: add.s64 %rd2, %rd1, 7;
+; CHECK-NEXT: and.b64 %rd3, %rd2, -8;
+; CHECK-NEXT: alloca.u64 %rd4, %rd3, 16;
+; CHECK-NEXT: cvta.local.u64 %rd4, %rd4;
+; CHECK-NEXT: ld.param.u64 %rd5, [foo_param_1];
+; CHECK-NEXT: alloca.u64 %rd6, %rd3, 16;
+; CHECK-NEXT: cvta.local.u64 %rd6, %rd6;
+; CHECK-NEXT: ld.param.u64 %rd7, [foo_param_2];
+; CHECK-NEXT: st.u64 [%rd5], %rd4;
+; CHECK-NEXT: st.u64 [%rd7], %rd6;
+; CHECK-NEXT: ret;
+ %b = alloca i8, i64 %a, align 16
+ %c = alloca i8, i64 %a, align 16
+ store ptr %b, ptr %p0, align 8
+ store ptr %c, ptr %p1, align 8
+ ret void
+}
More information about the llvm-commits
mailing list