[llvm] [NVPTX] TableGen-erate SDNode descriptions (PR #168367)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 05:44:02 PST 2025
https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/168367
This allows SDNodes to be validated against their expected type profiles
and reduces the number of changes required to add a new node.
The verification functionality detected a few issues, two of them were fixed (missing `SDNPMemOperand` property on `TCGEN05_MMA` nodes and extra glue operand/result on `CallPrototype`), the one remaining is with `ProxyReg` node, see `NVPTXSelectionDAGInfo::verifyTargetNode()`.
Part of #119709.
>From 8b93ff4245a4c5e066fcfeb72bcea8e3cf955199 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:02:11 +0300
Subject: [PATCH 1/3] NVPTX
---
llvm/lib/Target/NVPTX/CMakeLists.txt | 1 +
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 +
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 106 ++--------------
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 114 ------------------
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 +-
.../Target/NVPTX/NVPTXSelectionDAGInfo.cpp | 85 ++++++++++++-
llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h | 42 ++++++-
7 files changed, 134 insertions(+), 217 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index 693f0d0b35edc..f9c24750c4836 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index d525531766ddf..055f1ff47306d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -17,6 +17,7 @@
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
+#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3e44e47c56ad7..8fc3a68de6c79 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXISelDAGToDAG.h"
+#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
@@ -1107,97 +1108,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
{MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom);
}
-const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
-
-#define MAKE_CASE(V) \
- case V: \
- return #V;
-
- switch ((NVPTXISD::NodeType)Opcode) {
- case NVPTXISD::FIRST_NUMBER:
- break;
-
- MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
- MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
- MAKE_CASE(NVPTXISD::RET_GLUE)
- MAKE_CASE(NVPTXISD::DeclareArrayParam)
- MAKE_CASE(NVPTXISD::DeclareScalarParam)
- MAKE_CASE(NVPTXISD::CALL)
- MAKE_CASE(NVPTXISD::MoveParam)
- MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
- MAKE_CASE(NVPTXISD::BUILD_VECTOR)
- MAKE_CASE(NVPTXISD::CallPrototype)
- MAKE_CASE(NVPTXISD::ProxyReg)
- MAKE_CASE(NVPTXISD::LoadV2)
- MAKE_CASE(NVPTXISD::LoadV4)
- MAKE_CASE(NVPTXISD::LoadV8)
- MAKE_CASE(NVPTXISD::LDUV2)
- MAKE_CASE(NVPTXISD::LDUV4)
- MAKE_CASE(NVPTXISD::StoreV2)
- MAKE_CASE(NVPTXISD::StoreV4)
- MAKE_CASE(NVPTXISD::StoreV8)
- MAKE_CASE(NVPTXISD::FSHL_CLAMP)
- MAKE_CASE(NVPTXISD::FSHR_CLAMP)
- MAKE_CASE(NVPTXISD::BFI)
- MAKE_CASE(NVPTXISD::PRMT)
- MAKE_CASE(NVPTXISD::FCOPYSIGN)
- MAKE_CASE(NVPTXISD::FMAXNUM3)
- MAKE_CASE(NVPTXISD::FMINNUM3)
- MAKE_CASE(NVPTXISD::FMAXIMUM3)
- MAKE_CASE(NVPTXISD::FMINIMUM3)
- MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
- MAKE_CASE(NVPTXISD::STACKRESTORE)
- MAKE_CASE(NVPTXISD::STACKSAVE)
- MAKE_CASE(NVPTXISD::SETP_F16X2)
- MAKE_CASE(NVPTXISD::SETP_BF16X2)
- MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
- MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
- MAKE_CASE(NVPTXISD::BrxEnd)
- MAKE_CASE(NVPTXISD::BrxItem)
- MAKE_CASE(NVPTXISD::BrxStart)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF)
- }
- return nullptr;
-
-#undef MAKE_CASE
-}
-
TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
@@ -2032,7 +1942,7 @@ static ISD::NodeType getScalarOpcodeForReduction(unsigned ReductionOpcode) {
}
/// Get 3-input scalar reduction opcode
-static std::optional<NVPTXISD::NodeType>
+static std::optional<unsigned>
getScalar3OpcodeForReduction(unsigned ReductionOpcode) {
switch (ReductionOpcode) {
case ISD::VECREDUCE_FMAX:
@@ -2931,7 +2841,7 @@ static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
using NVPTX::PTXCvtMode::CvtMode;
auto [OpCode, RetTy, CvtModeFlag] =
- [&]() -> std::tuple<NVPTXISD::NodeType, MVT::SimpleValueType, uint32_t> {
+ [&]() -> std::tuple<unsigned, MVT::SimpleValueType, uint32_t> {
switch (IntrinsicID) {
case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8,
@@ -3314,7 +3224,7 @@ SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
// Generate BrxEnd nodes
SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
IdV, Chain.getValue(1)};
- SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps);
+ SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);
return BrxEnd;
}
@@ -5457,7 +5367,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(LD);
// the new opcode after we double the number of operands
- NVPTXISD::NodeType Opcode;
+ unsigned Opcode;
SmallVector<SDValue> Operands(LD->ops());
unsigned OldNumOutputs; // non-glue, non-chain outputs
switch (LD->getOpcode()) {
@@ -5540,7 +5450,7 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
auto *ST = cast<MemSDNode>(N);
// The new opcode after we double the number of operands.
- NVPTXISD::NodeType Opcode;
+ unsigned Opcode;
switch (N->getOpcode()) {
case ISD::STORE:
// Any packed type is legal, so the legalizer will not have lowered
@@ -5675,7 +5585,7 @@ static SDValue PerformFADDCombine(SDNode *N,
}
/// Get 3-input version of a 2-input min/max opcode
-static NVPTXISD::NodeType getMinMax3Opcode(unsigned MinMax2Opcode) {
+static unsigned getMinMax3Opcode(unsigned MinMax2Opcode) {
switch (MinMax2Opcode) {
case ISD::FMAXNUM:
case ISD::FMAXIMUMNUM:
@@ -5706,7 +5616,7 @@ static SDValue PerformFMinMaxCombine(SDNode *N,
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
unsigned MinMaxOp2 = N->getOpcode();
- NVPTXISD::NodeType MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
+ unsigned MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
if (Op0.getOpcode() == MinMaxOp2 && Op0.hasOneUse()) {
// (maxnum (maxnum a, b), c) -> (maxnum3 a, b, c)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 63fa0bb9159ff..d71a86fd463f6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -20,118 +20,6 @@
#include "llvm/Support/AtomicOrdering.h"
namespace llvm {
-namespace NVPTXISD {
-enum NodeType : unsigned {
- // Start the numbering from where ISD NodeType finishes.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_GLUE,
-
- /// These nodes represent a parameter declaration. In PTX this will look like:
- /// .param .align 16 .b8 param0[1024];
- /// .param .b32 retval0;
- ///
- /// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
- /// DeclareScalarParam(Chain, Externalsym, Size, Glue)
- DeclareScalarParam,
- DeclareArrayParam,
-
- /// This node represents a PTX call instruction. It's operands are as follows:
- ///
- /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
- /// NumParams, Callee, Proto)
- CALL,
-
- MoveParam,
- CallPrototype,
- ProxyReg,
- FSHL_CLAMP,
- FSHR_CLAMP,
- MUL_WIDE_SIGNED,
- MUL_WIDE_UNSIGNED,
- SETP_F16X2,
- SETP_BF16X2,
- BFI,
- PRMT,
-
- /// This node is similar to ISD::BUILD_VECTOR except that the output may be
- /// implicitly bitcast to a scalar. This allows for the representation of
- /// packing move instructions for vector types which are not legal i.e. v2i32
- BUILD_VECTOR,
-
- /// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value
- /// which may be a scalar and unpacks it into multiple values by implicitly
- /// converting it to a vector.
- UNPACK_VECTOR,
-
- FCOPYSIGN,
- FMAXNUM3,
- FMINNUM3,
- FMAXIMUM3,
- FMINIMUM3,
-
- DYNAMIC_STACKALLOC,
- STACKRESTORE,
- STACKSAVE,
- BrxStart,
- BrxItem,
- BrxEnd,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
- CVT_E4M3X4_F32X4_RS_SF,
- CVT_E5M2X4_F32X4_RS_SF,
- CVT_E2M3X4_F32X4_RS_SF,
- CVT_E3M2X4_F32X4_RS_SF,
- CVT_E2M1X4_F32X4_RS_SF,
-
- FIRST_MEMORY_OPCODE,
-
- /// These nodes are used to lower atomic instructions with i128 type. They are
- /// similar to the generic nodes, but the input and output values are split
- /// into two 64-bit values.
- /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
- /// swapLo, swapHi)
- /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
- ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
- ATOMIC_SWAP_B128,
-
- LoadV2,
- LoadV4,
- LoadV8,
- LDUV2, // LDU.v2
- LDUV4, // LDU.v4
- StoreV2,
- StoreV4,
- StoreV8,
- TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- LAST_MEMORY_OPCODE =
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
-};
-}
class NVPTXSubtarget;
@@ -144,8 +32,6 @@ class NVPTXTargetLowering : public TargetLowering {
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- const char *getTargetNodeName(unsigned Opcode) const override;
-
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index f0bdf472b96ed..260d0d5494869 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1754,7 +1754,7 @@ def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def CallPrototype :
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPSideEffect]>;
def ProtoIdent : Operand<i32> { let PrintMethod = "printProtoIdent"; }
def CALL_PROTOTYPE :
NVPTXInst<(outs), (ins ProtoIdent:$ident),
diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
index d2035c6f8166f..05af7dac3fd78 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
@@ -7,13 +7,92 @@
//===----------------------------------------------------------------------===//
#include "NVPTXSelectionDAGInfo.h"
-#include "NVPTXISelLowering.h"
+
+#define GET_SDNODE_DESC
+#include "NVPTXGenSDNodeInfo.inc"
using namespace llvm;
+NVPTXSelectionDAGInfo::NVPTXSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(NVPTXGenSDNodeInfo) {}
+
NVPTXSelectionDAGInfo::~NVPTXSelectionDAGInfo() = default;
+const char *NVPTXSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V) \
+ case V: \
+ return #V;
+
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<NVPTXISD::NodeType>(Opcode)) {
+ MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
+ MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
+ MAKE_CASE(NVPTXISD::LoadV2)
+ MAKE_CASE(NVPTXISD::LoadV4)
+ MAKE_CASE(NVPTXISD::LoadV8)
+ MAKE_CASE(NVPTXISD::LDUV2)
+ MAKE_CASE(NVPTXISD::LDUV4)
+ MAKE_CASE(NVPTXISD::StoreV2)
+ MAKE_CASE(NVPTXISD::StoreV4)
+ MAKE_CASE(NVPTXISD::StoreV8)
+ MAKE_CASE(NVPTXISD::SETP_F16X2)
+ MAKE_CASE(NVPTXISD::SETP_BF16X2)
+ MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
+ }
+#undef MAKE_CASE
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
bool NVPTXSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE &&
- Opcode <= NVPTXISD::LAST_MEMORY_OPCODE;
+ // These nodes don't have corresponding entries in *.td files.
+ if (Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE &&
+ Opcode <= NVPTXISD::LAST_MEMORY_OPCODE)
+ return true;
+
+ // These nodes lack SDNPMemOperand property.
+ switch (Opcode) {
+ default:
+ break;
+ case NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ return true;
+ }
+
+ return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
+}
+
+void NVPTXSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case NVPTXISD::ProxyReg:
+ // invalid number of results; expected 2, got 1
+ return;
+ }
+
+ return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
index 9d69f48026c79..07c130baeaa4f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
@@ -11,13 +11,53 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "NVPTXGenSDNodeInfo.inc"
+
namespace llvm {
+namespace NVPTXISD {
+
+enum NodeType : unsigned {
+ SETP_F16X2 = GENERATED_OPCODE_END,
+ SETP_BF16X2,
+ UNPACK_VECTOR,
+
+ FIRST_MEMORY_OPCODE,
+
+ /// These nodes are used to lower atomic instructions with i128 type. They are
+ /// similar to the generic nodes, but the input and output values are split
+ /// into two 64-bit values.
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
+ /// swapLo, swapHi)
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
+ ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
+ ATOMIC_SWAP_B128,
+
+ LoadV2,
+ LoadV4,
+ LoadV8,
+ LDUV2, // LDU.v2
+ LDUV4, // LDU.v4
+ StoreV2,
+ StoreV4,
+ StoreV8,
+ LAST_MEMORY_OPCODE = StoreV8,
+};
+
+} // namespace NVPTXISD
-class NVPTXSelectionDAGInfo : public SelectionDAGTargetInfo {
+class NVPTXSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ NVPTXSelectionDAGInfo();
+
~NVPTXSelectionDAGInfo() override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
bool isTargetMemoryOpcode(unsigned Opcode) const override;
+
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
};
} // namespace llvm
>From ce99836fa29f967e669725e3bc0460612a358d0d Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Mon, 17 Nov 2025 16:34:49 +0300
Subject: [PATCH 2/3] Add SDNPMemOperand to TCGEN05_MMA_* nodes
---
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 +-
.../Target/NVPTX/NVPTXSelectionDAGInfo.cpp | 31 -------------------
2 files changed, 1 insertion(+), 32 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index ea69a54e6db37..bcdb46eca9744 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -5625,7 +5625,7 @@ class Tcgen05MMADisableOutputLaneSDNode<bit Sp, string ASpace,
# "_DISABLE_OUTPUT_LANE_CG" # CtaGroup
# !if(!eq(AShift, 1), "_ASHIFT", ""),
Tcgen05MMADisableOutputLaneTypeProfile<Sp, ASpace, CtaGroup, ScaleInput>,
- [SDNPHasChain, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPSideEffect, SDNPMemOperand]>;
class Tcgen05MMADisableOutputLaneInst<bit Sp, string ASpace,
string Kind, int CtaGroup, string CollectorUsageStr,
diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
index 05af7dac3fd78..e8ea1ad6c404d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
@@ -50,37 +50,6 @@ bool NVPTXSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
Opcode <= NVPTXISD::LAST_MEMORY_OPCODE)
return true;
- // These nodes lack SDNPMemOperand property.
- switch (Opcode) {
- default:
- break;
- case NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
- case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
- case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
- case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
- case NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
- case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
- return true;
- }
-
return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
}
>From 59feabd2fbf376dcd16210dfe1b40583ca783239 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Mon, 17 Nov 2025 16:39:00 +0300
Subject: [PATCH 3/3] Move comments
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 260d0d5494869..b3d046dd05d51 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1686,13 +1686,19 @@ def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]
def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
-
+// These nodes represent a parameter declaration. In PTX this will look like:
+// .param .align 16 .b8 param0[1024];
+// .param .b32 retval0;
+//
+// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
+// DeclareScalarParam(Chain, Externalsym, Size, Glue)
def declare_array_param :
SDNode<"NVPTXISD::DeclareArrayParam", SDTDeclareArrayParam,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def declare_scalar_param :
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+
def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def proxy_reg :
@@ -2181,6 +2187,9 @@ foreach vt = [v2f32, v2i32] in {
def: Pat<(v2i16 (scalar_to_vector i16:$a)),
(CVT_u32_u16 $a, CvtNONE)>;
+// This node is similar to ISD::BUILD_VECTOR except that the output may be
+// implicitly bitcast to a scalar. This allows for the representation of
+// packing move instructions for vector types which are not legal i.e. v2i32
def nvptx_build_vector : SDNode<"NVPTXISD::BUILD_VECTOR", SDTypeProfile<1, 2, []>, []>;
def : Pat<(i64 (nvptx_build_vector i32:$a, i32:$b)),
More information about the llvm-commits
mailing list