[llvm] [NVPTX] Add support for stacksave, stackrestore intrinsics (PR #114484)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 16:14:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
Add support for the '`@<!-- -->llvm.stacksave`' and '`@<!-- -->llvm.stackrestore`' intrinsics to NVPTX. These are implemented with the `stacksave` and `stackrestore` PTX instructions respectively. See [PTX ISA 9.7.17. Stack Manipulation Instructions](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#stack-manipulation-instructions).
---
Full diff: https://github.com/llvm/llvm-project/pull/114484.diff
4 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+57-2)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (+4)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+38)
- (added) llvm/test/CodeGen/NVPTX/stacksaverestore.ll (+83)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 01abf9591e342f..0bc11d0c905430 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -667,8 +668,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, {MVT::i32, MVT::i64}, Custom);
+ setOperationAction({ISD::STACKRESTORE, ISD::STACKSAVE}, MVT::Other, Custom);
// TRAP can be lowered to PTX trap
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -961,6 +962,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::PRMT)
MAKE_CASE(NVPTXISD::FCOPYSIGN)
MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
+ MAKE_CASE(NVPTXISD::STACKRESTORE)
+ MAKE_CASE(NVPTXISD::STACKSAVE)
MAKE_CASE(NVPTXISD::SETP_F16X2)
MAKE_CASE(NVPTXISD::SETP_BF16X2)
MAKE_CASE(NVPTXISD::Dummy)
@@ -2287,6 +2290,54 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL, RetTypes, AllocOps);
}
+SDValue NVPTXTargetLowering::LowerSTACKRESTORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op.getNode());
+ if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+
+ DiagnosticInfoUnsupported NoStackRestore(
+ Fn,
+ "Support for stackrestore introduced in PTX ISA version 7.3 and "
+ "requires target sm_52.",
+ DL.getDebugLoc());
+ DAG.getContext()->diagnose(NoStackRestore);
+ return Op.getOperand(0);
+ }
+
+ const MVT LocalVT = getPointerTy(DAG.getDataLayout(), ADDRESS_SPACE_LOCAL);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue ASC = DAG.getAddrSpaceCast(DL, LocalVT, Ptr, ADDRESS_SPACE_GENERIC,
+ ADDRESS_SPACE_LOCAL);
+ return DAG.getNode(NVPTXISD::STACKRESTORE, DL, MVT::Other, {Chain, ASC});
+}
+
+SDValue NVPTXTargetLowering::LowerSTACKSAVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op.getNode());
+ if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+
+ DiagnosticInfoUnsupported NoStackSave(
+ Fn,
+ "Support for stacksave introduced in PTX ISA version 7.3 and "
+ "requires target sm_52.",
+ DL.getDebugLoc());
+ DAG.getContext()->diagnose(NoStackSave);
+ auto Ops = {DAG.getConstant(0, DL, Op.getValueType()), Op.getOperand(0)};
+ return DAG.getMergeValues(Ops, DL);
+ }
+
+ const MVT LocalVT = getPointerTy(DAG.getDataLayout(), ADDRESS_SPACE_LOCAL);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SS =
+ DAG.getNode(NVPTXISD::STACKSAVE, DL, {LocalVT, MVT::Other}, Chain);
+ SDValue ASC = DAG.getAddrSpaceCast(
+ DL, Op.getValueType(), SS, ADDRESS_SPACE_LOCAL, ADDRESS_SPACE_GENERIC);
+ return DAG.getMergeValues({ASC, SDValue(SS.getNode(), 1)}, DL);
+}
+
// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
// (see LegalizeDAG.cpp). This is slow and uses local memory.
// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
@@ -2871,6 +2922,10 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerVectorArith(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::STACKRESTORE:
+ return LowerSTACKRESTORE(Op, DAG);
+ case ISD::STACKSAVE:
+ return LowerSTACKSAVE(Op, DAG);
case ISD::CopyToReg:
return LowerCopyToReg_128(Op, DAG);
default:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 824a659671967a..ead9ca4a311ae3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -63,6 +63,8 @@ enum NodeType : unsigned {
PRMT,
FCOPYSIGN,
DYNAMIC_STACKALLOC,
+ STACKRESTORE,
+ STACKSAVE,
BrxStart,
BrxItem,
BrxEnd,
@@ -526,6 +528,8 @@ class NVPTXTargetLowering : public TargetLowering {
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
std::string
getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 1ca3aefb0b0934..2658ca32716378 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3860,6 +3860,44 @@ foreach a_type = ["s", "u"] in {
}
}
+//
+// Stack Manipulation
+//
+
+def SDTStackRestore : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def stackrestore :
+ SDNode<"NVPTXISD::STACKRESTORE", SDTStackRestore,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def stacksave :
+ SDNode<"NVPTXISD::STACKSAVE", SDTIntLeaf,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def STACKRESTORE_32 :
+ NVPTXInst<(outs), (ins Int32Regs:$ptr),
+ "stackrestore.u32 \t$ptr;",
+ [(stackrestore (i32 Int32Regs:$ptr))]>,
+ Requires<[hasPTX<73>, hasSM<52>]>;
+
+def STACKSAVE_32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins),
+ "stacksave.u32 \t$dst;",
+ [(set Int32Regs:$dst, (i32 stacksave))]>,
+ Requires<[hasPTX<73>, hasSM<52>]>;
+
+def STACKRESTORE_64 :
+ NVPTXInst<(outs), (ins Int64Regs:$ptr),
+ "stackrestore.u64 \t$ptr;",
+ [(stackrestore (i64 Int64Regs:$ptr))]>,
+ Requires<[hasPTX<73>, hasSM<52>]>;
+
+def STACKSAVE_64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins),
+ "stacksave.u64 \t$dst;",
+ [(set Int64Regs:$dst, (i64 stacksave))]>,
+ Requires<[hasPTX<73>, hasSM<52>]>;
+
include "NVPTXIntrinsics.td"
//-----------------------------------
diff --git a/llvm/test/CodeGen/NVPTX/stacksaverestore.ll b/llvm/test/CodeGen/NVPTX/stacksaverestore.ll
new file mode 100644
index 00000000000000..f5a057fcb483c4
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/stacksaverestore.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx -mcpu=sm_60 -mattr=+ptx73 | FileCheck %s --check-prefix=CHECK-32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx73 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=nvptx64 -nvptx-short-ptr -mcpu=sm_60 -mattr=+ptx73 | FileCheck %s --check-prefix=CHECK-MIXED
+; RUN: %if ptxas && ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx73 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define ptr @test_save() {
+; CHECK-32-LABEL: test_save(
+; CHECK-32: {
+; CHECK-32-NEXT: .reg .b32 %r<3>;
+; CHECK-32-EMPTY:
+; CHECK-32-NEXT: // %bb.0:
+; CHECK-32-NEXT: stacksave.u32 %r1;
+; CHECK-32-NEXT: cvta.local.u32 %r2, %r1;
+; CHECK-32-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-32-NEXT: ret;
+;
+; CHECK-64-LABEL: test_save(
+; CHECK-64: {
+; CHECK-64-NEXT: .reg .b64 %rd<3>;
+; CHECK-64-EMPTY:
+; CHECK-64-NEXT: // %bb.0:
+; CHECK-64-NEXT: stacksave.u64 %rd1;
+; CHECK-64-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-64-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-64-NEXT: ret;
+;
+; CHECK-MIXED-LABEL: test_save(
+; CHECK-MIXED: {
+; CHECK-MIXED-NEXT: .reg .b32 %r<2>;
+; CHECK-MIXED-NEXT: .reg .b64 %rd<3>;
+; CHECK-MIXED-EMPTY:
+; CHECK-MIXED-NEXT: // %bb.0:
+; CHECK-MIXED-NEXT: stacksave.u32 %r1;
+; CHECK-MIXED-NEXT: cvt.u64.u32 %rd1, %r1;
+; CHECK-MIXED-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-MIXED-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-MIXED-NEXT: ret;
+ %1 = call ptr @llvm.stacksave()
+ ret ptr %1
+}
+
+
+define void @test_restore(ptr %p) {
+; CHECK-32-LABEL: test_restore(
+; CHECK-32: {
+; CHECK-32-NEXT: .reg .b32 %r<3>;
+; CHECK-32-EMPTY:
+; CHECK-32-NEXT: // %bb.0:
+; CHECK-32-NEXT: ld.param.u32 %r1, [test_restore_param_0];
+; CHECK-32-NEXT: cvta.to.local.u32 %r2, %r1;
+; CHECK-32-NEXT: stackrestore.u32 %r2;
+; CHECK-32-NEXT: ret;
+;
+; CHECK-64-LABEL: test_restore(
+; CHECK-64: {
+; CHECK-64-NEXT: .reg .b64 %rd<3>;
+; CHECK-64-EMPTY:
+; CHECK-64-NEXT: // %bb.0:
+; CHECK-64-NEXT: ld.param.u64 %rd1, [test_restore_param_0];
+; CHECK-64-NEXT: cvta.to.local.u64 %rd2, %rd1;
+; CHECK-64-NEXT: stackrestore.u64 %rd2;
+; CHECK-64-NEXT: ret;
+;
+; CHECK-MIXED-LABEL: test_restore(
+; CHECK-MIXED: {
+; CHECK-MIXED-NEXT: .reg .b32 %r<2>;
+; CHECK-MIXED-NEXT: .reg .b64 %rd<3>;
+; CHECK-MIXED-EMPTY:
+; CHECK-MIXED-NEXT: // %bb.0:
+; CHECK-MIXED-NEXT: ld.param.u64 %rd1, [test_restore_param_0];
+; CHECK-MIXED-NEXT: cvta.to.local.u64 %rd2, %rd1;
+; CHECK-MIXED-NEXT: cvt.u32.u64 %r1, %rd2;
+; CHECK-MIXED-NEXT: stackrestore.u32 %r1;
+; CHECK-MIXED-NEXT: ret;
+ call void @llvm.stackrestore(ptr %p)
+ ret void
+}
+
+declare ptr @llvm.stacksave()
+declare void @llvm.stackrestore(ptr)
``````````
</details>
https://github.com/llvm/llvm-project/pull/114484
More information about the llvm-commits
mailing list