[llvm] [RFC] TableGen-erate SDNode descriptions (PR #119709)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 16 16:17:35 PST 2025
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/119709
>From c583cef53fa01915ba6316ca7a5077b68983a37d Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:00:59 +0300
Subject: [PATCH 1/9] ARM
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 220 +-----------
llvm/lib/Target/ARM/ARMISelLowering.h | 315 ------------------
llvm/lib/Target/ARM/ARMInstrInfo.td | 2 +-
llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 90 ++++-
llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 75 ++++-
llvm/lib/Target/ARM/CMakeLists.txt | 1 +
.../Target/ARM/ARMSelectionDAGTest.cpp | 2 +-
7 files changed, 167 insertions(+), 538 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f28640ce7b107..cd8d7a0bee5e3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1556,220 +1556,6 @@ ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
return std::make_pair(RRC, Cost);
}
-const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
-#define MAKE_CASE(V) \
- case V: \
- return #V;
- switch ((ARMISD::NodeType)Opcode) {
- case ARMISD::FIRST_NUMBER:
- break;
- MAKE_CASE(ARMISD::Wrapper)
- MAKE_CASE(ARMISD::WrapperPIC)
- MAKE_CASE(ARMISD::WrapperJT)
- MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL)
- MAKE_CASE(ARMISD::CALL)
- MAKE_CASE(ARMISD::CALL_PRED)
- MAKE_CASE(ARMISD::CALL_NOLINK)
- MAKE_CASE(ARMISD::tSECALL)
- MAKE_CASE(ARMISD::t2CALL_BTI)
- MAKE_CASE(ARMISD::BRCOND)
- MAKE_CASE(ARMISD::BR_JT)
- MAKE_CASE(ARMISD::BR2_JT)
- MAKE_CASE(ARMISD::RET_GLUE)
- MAKE_CASE(ARMISD::SERET_GLUE)
- MAKE_CASE(ARMISD::INTRET_GLUE)
- MAKE_CASE(ARMISD::PIC_ADD)
- MAKE_CASE(ARMISD::CMP)
- MAKE_CASE(ARMISD::CMN)
- MAKE_CASE(ARMISD::CMPZ)
- MAKE_CASE(ARMISD::CMPFP)
- MAKE_CASE(ARMISD::CMPFPE)
- MAKE_CASE(ARMISD::CMPFPw0)
- MAKE_CASE(ARMISD::CMPFPEw0)
- MAKE_CASE(ARMISD::BCC_i64)
- MAKE_CASE(ARMISD::FMSTAT)
- MAKE_CASE(ARMISD::CMOV)
- MAKE_CASE(ARMISD::SSAT)
- MAKE_CASE(ARMISD::USAT)
- MAKE_CASE(ARMISD::ASRL)
- MAKE_CASE(ARMISD::LSRL)
- MAKE_CASE(ARMISD::LSLL)
- MAKE_CASE(ARMISD::LSLS)
- MAKE_CASE(ARMISD::LSRS1)
- MAKE_CASE(ARMISD::ASRS1)
- MAKE_CASE(ARMISD::RRX)
- MAKE_CASE(ARMISD::ADDC)
- MAKE_CASE(ARMISD::ADDE)
- MAKE_CASE(ARMISD::SUBC)
- MAKE_CASE(ARMISD::SUBE)
- MAKE_CASE(ARMISD::VMOVRRD)
- MAKE_CASE(ARMISD::VMOVDRR)
- MAKE_CASE(ARMISD::VMOVhr)
- MAKE_CASE(ARMISD::VMOVrh)
- MAKE_CASE(ARMISD::VMOVSR)
- MAKE_CASE(ARMISD::EH_SJLJ_SETJMP)
- MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP)
- MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH)
- MAKE_CASE(ARMISD::TC_RETURN)
- MAKE_CASE(ARMISD::THREAD_POINTER)
- MAKE_CASE(ARMISD::DYN_ALLOC)
- MAKE_CASE(ARMISD::MEMBARRIER_MCR)
- MAKE_CASE(ARMISD::PRELOAD)
- MAKE_CASE(ARMISD::LDRD)
- MAKE_CASE(ARMISD::STRD)
- MAKE_CASE(ARMISD::WIN__CHKSTK)
- MAKE_CASE(ARMISD::WIN__DBZCHK)
- MAKE_CASE(ARMISD::PREDICATE_CAST)
- MAKE_CASE(ARMISD::VECTOR_REG_CAST)
- MAKE_CASE(ARMISD::MVESEXT)
- MAKE_CASE(ARMISD::MVEZEXT)
- MAKE_CASE(ARMISD::MVETRUNC)
- MAKE_CASE(ARMISD::VCMP)
- MAKE_CASE(ARMISD::VCMPZ)
- MAKE_CASE(ARMISD::VTST)
- MAKE_CASE(ARMISD::VSHLs)
- MAKE_CASE(ARMISD::VSHLu)
- MAKE_CASE(ARMISD::VSHLIMM)
- MAKE_CASE(ARMISD::VSHRsIMM)
- MAKE_CASE(ARMISD::VSHRuIMM)
- MAKE_CASE(ARMISD::VRSHRsIMM)
- MAKE_CASE(ARMISD::VRSHRuIMM)
- MAKE_CASE(ARMISD::VRSHRNIMM)
- MAKE_CASE(ARMISD::VQSHLsIMM)
- MAKE_CASE(ARMISD::VQSHLuIMM)
- MAKE_CASE(ARMISD::VQSHLsuIMM)
- MAKE_CASE(ARMISD::VQSHRNsIMM)
- MAKE_CASE(ARMISD::VQSHRNuIMM)
- MAKE_CASE(ARMISD::VQSHRNsuIMM)
- MAKE_CASE(ARMISD::VQRSHRNsIMM)
- MAKE_CASE(ARMISD::VQRSHRNuIMM)
- MAKE_CASE(ARMISD::VQRSHRNsuIMM)
- MAKE_CASE(ARMISD::VSLIIMM)
- MAKE_CASE(ARMISD::VSRIIMM)
- MAKE_CASE(ARMISD::VGETLANEu)
- MAKE_CASE(ARMISD::VGETLANEs)
- MAKE_CASE(ARMISD::VMOVIMM)
- MAKE_CASE(ARMISD::VMVNIMM)
- MAKE_CASE(ARMISD::VMOVFPIMM)
- MAKE_CASE(ARMISD::VDUP)
- MAKE_CASE(ARMISD::VDUPLANE)
- MAKE_CASE(ARMISD::VEXT)
- MAKE_CASE(ARMISD::VREV64)
- MAKE_CASE(ARMISD::VREV32)
- MAKE_CASE(ARMISD::VREV16)
- MAKE_CASE(ARMISD::VZIP)
- MAKE_CASE(ARMISD::VUZP)
- MAKE_CASE(ARMISD::VTRN)
- MAKE_CASE(ARMISD::VTBL1)
- MAKE_CASE(ARMISD::VTBL2)
- MAKE_CASE(ARMISD::VMOVN)
- MAKE_CASE(ARMISD::VQMOVNs)
- MAKE_CASE(ARMISD::VQMOVNu)
- MAKE_CASE(ARMISD::VCVTN)
- MAKE_CASE(ARMISD::VCVTL)
- MAKE_CASE(ARMISD::VIDUP)
- MAKE_CASE(ARMISD::VMULLs)
- MAKE_CASE(ARMISD::VMULLu)
- MAKE_CASE(ARMISD::VQDMULH)
- MAKE_CASE(ARMISD::VADDVs)
- MAKE_CASE(ARMISD::VADDVu)
- MAKE_CASE(ARMISD::VADDVps)
- MAKE_CASE(ARMISD::VADDVpu)
- MAKE_CASE(ARMISD::VADDLVs)
- MAKE_CASE(ARMISD::VADDLVu)
- MAKE_CASE(ARMISD::VADDLVAs)
- MAKE_CASE(ARMISD::VADDLVAu)
- MAKE_CASE(ARMISD::VADDLVps)
- MAKE_CASE(ARMISD::VADDLVpu)
- MAKE_CASE(ARMISD::VADDLVAps)
- MAKE_CASE(ARMISD::VADDLVApu)
- MAKE_CASE(ARMISD::VMLAVs)
- MAKE_CASE(ARMISD::VMLAVu)
- MAKE_CASE(ARMISD::VMLAVps)
- MAKE_CASE(ARMISD::VMLAVpu)
- MAKE_CASE(ARMISD::VMLALVs)
- MAKE_CASE(ARMISD::VMLALVu)
- MAKE_CASE(ARMISD::VMLALVps)
- MAKE_CASE(ARMISD::VMLALVpu)
- MAKE_CASE(ARMISD::VMLALVAs)
- MAKE_CASE(ARMISD::VMLALVAu)
- MAKE_CASE(ARMISD::VMLALVAps)
- MAKE_CASE(ARMISD::VMLALVApu)
- MAKE_CASE(ARMISD::VMINVu)
- MAKE_CASE(ARMISD::VMINVs)
- MAKE_CASE(ARMISD::VMAXVu)
- MAKE_CASE(ARMISD::VMAXVs)
- MAKE_CASE(ARMISD::UMAAL)
- MAKE_CASE(ARMISD::UMLAL)
- MAKE_CASE(ARMISD::SMLAL)
- MAKE_CASE(ARMISD::SMLALBB)
- MAKE_CASE(ARMISD::SMLALBT)
- MAKE_CASE(ARMISD::SMLALTB)
- MAKE_CASE(ARMISD::SMLALTT)
- MAKE_CASE(ARMISD::SMULWB)
- MAKE_CASE(ARMISD::SMULWT)
- MAKE_CASE(ARMISD::SMLALD)
- MAKE_CASE(ARMISD::SMLALDX)
- MAKE_CASE(ARMISD::SMLSLD)
- MAKE_CASE(ARMISD::SMLSLDX)
- MAKE_CASE(ARMISD::SMMLAR)
- MAKE_CASE(ARMISD::SMMLSR)
- MAKE_CASE(ARMISD::QADD16b)
- MAKE_CASE(ARMISD::QSUB16b)
- MAKE_CASE(ARMISD::QADD8b)
- MAKE_CASE(ARMISD::QSUB8b)
- MAKE_CASE(ARMISD::UQADD16b)
- MAKE_CASE(ARMISD::UQSUB16b)
- MAKE_CASE(ARMISD::UQADD8b)
- MAKE_CASE(ARMISD::UQSUB8b)
- MAKE_CASE(ARMISD::BUILD_VECTOR)
- MAKE_CASE(ARMISD::BFI)
- MAKE_CASE(ARMISD::VORRIMM)
- MAKE_CASE(ARMISD::VBICIMM)
- MAKE_CASE(ARMISD::VBSP)
- MAKE_CASE(ARMISD::MEMCPY)
- MAKE_CASE(ARMISD::VLD1DUP)
- MAKE_CASE(ARMISD::VLD2DUP)
- MAKE_CASE(ARMISD::VLD3DUP)
- MAKE_CASE(ARMISD::VLD4DUP)
- MAKE_CASE(ARMISD::VLD1_UPD)
- MAKE_CASE(ARMISD::VLD2_UPD)
- MAKE_CASE(ARMISD::VLD3_UPD)
- MAKE_CASE(ARMISD::VLD4_UPD)
- MAKE_CASE(ARMISD::VLD1x2_UPD)
- MAKE_CASE(ARMISD::VLD1x3_UPD)
- MAKE_CASE(ARMISD::VLD1x4_UPD)
- MAKE_CASE(ARMISD::VLD2LN_UPD)
- MAKE_CASE(ARMISD::VLD3LN_UPD)
- MAKE_CASE(ARMISD::VLD4LN_UPD)
- MAKE_CASE(ARMISD::VLD1DUP_UPD)
- MAKE_CASE(ARMISD::VLD2DUP_UPD)
- MAKE_CASE(ARMISD::VLD3DUP_UPD)
- MAKE_CASE(ARMISD::VLD4DUP_UPD)
- MAKE_CASE(ARMISD::VST1_UPD)
- MAKE_CASE(ARMISD::VST2_UPD)
- MAKE_CASE(ARMISD::VST3_UPD)
- MAKE_CASE(ARMISD::VST4_UPD)
- MAKE_CASE(ARMISD::VST1x2_UPD)
- MAKE_CASE(ARMISD::VST1x3_UPD)
- MAKE_CASE(ARMISD::VST1x4_UPD)
- MAKE_CASE(ARMISD::VST2LN_UPD)
- MAKE_CASE(ARMISD::VST3LN_UPD)
- MAKE_CASE(ARMISD::VST4LN_UPD)
- MAKE_CASE(ARMISD::WLS)
- MAKE_CASE(ARMISD::WLSSETUP)
- MAKE_CASE(ARMISD::LE)
- MAKE_CASE(ARMISD::LOOP_DEC)
- MAKE_CASE(ARMISD::CSINV)
- MAKE_CASE(ARMISD::CSNEG)
- MAKE_CASE(ARMISD::CSINC)
- MAKE_CASE(ARMISD::MEMCPYLOOP)
- MAKE_CASE(ARMISD::MEMSETLOOP)
-#undef MAKE_CASE
- }
- return nullptr;
-}
-
EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
@@ -3344,8 +3130,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return LowerInterruptReturn(RetOps, dl, DAG);
}
- ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE :
- ARMISD::RET_GLUE;
+ unsigned RetNode =
+ AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE;
return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
}
@@ -4861,7 +4647,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
- ARMISD::NodeType CompareType;
+ unsigned CompareType;
switch (CondCode) {
default:
CompareType = ARMISD::CMP;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index bc2fec3c1bdb5..8191eb40a712a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -51,319 +51,6 @@ class TargetMachine;
class TargetRegisterInfo;
class VectorType;
- namespace ARMISD {
-
- // ARM Specific DAG Nodes
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
- // TargetExternalSymbol, and TargetGlobalAddress.
- WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
- // PIC mode.
- WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
-
- // Add pseudo op to model memcpy for struct byval.
- COPY_STRUCT_BYVAL,
-
- CALL, // Function call.
- CALL_PRED, // Function call that's predicable.
- CALL_NOLINK, // Function call with branch not branch-and-link.
- tSECALL, // CMSE non-secure function call.
- t2CALL_BTI, // Thumb function call followed by BTI instruction.
- BRCOND, // Conditional branch.
- BR_JT, // Jumptable branch.
- BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
- RET_GLUE, // Return with a flag operand.
- SERET_GLUE, // CMSE Entry function return with a flag operand.
- INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand.
-
- PIC_ADD, // Add with a PC operand and a PIC label.
-
- ASRL, // MVE long arithmetic shift right.
- LSRL, // MVE long shift right.
- LSLL, // MVE long shift left.
-
- CMP, // ARM compare instructions.
- CMN, // ARM CMN instructions.
- CMPZ, // ARM compare that sets only Z flag.
- CMPFP, // ARM VFP compare instruction, sets FPSCR.
- CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR.
- CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
- CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets
- // FPSCR.
- FMSTAT, // ARM fmstat instruction.
-
- CMOV, // ARM conditional move instructions.
-
- SSAT, // Signed saturation
- USAT, // Unsigned saturation
-
- BCC_i64,
-
- LSLS, // Flag-setting shift left.
- LSRS1, // Flag-setting logical shift right by one bit.
- ASRS1, // Flag-setting arithmetic shift right by one bit.
- RRX, // Shift right one bit with carry in.
-
- ADDC, // Add with carry
- ADDE, // Add using carry
- SUBC, // Sub with carry
- SUBE, // Sub using carry
-
- VMOVRRD, // double to two gprs.
- VMOVDRR, // Two gprs to double.
- VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr
-
- EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
- EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
- EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
-
- TC_RETURN, // Tail call return pseudo.
-
- THREAD_POINTER,
-
- DYN_ALLOC, // Dynamic allocation on the stack.
-
- MEMBARRIER_MCR, // Memory barrier (MCR)
-
- PRELOAD, // Preload
-
- WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
- WIN__DBZCHK, // Windows' divide by zero check
-
- WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart
- WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup.
- LOOP_DEC, // Really a part of LE, performs the sub
- LE, // Low-overhead loops, Loop End
-
- PREDICATE_CAST, // Predicate cast for MVE i1 types
- VECTOR_REG_CAST, // Reinterpret the current contents of a vector register
-
- MVESEXT, // Legalization aids for extending a vector into two/four vectors.
- MVEZEXT, // or truncating two/four vectors into one. Eventually becomes
- MVETRUNC, // stack store/load sequence, if not optimized to anything else.
-
- VCMP, // Vector compare.
- VCMPZ, // Vector compare to zero.
- VTST, // Vector test bits.
-
- // Vector shift by vector
- VSHLs, // ...left/right by signed
- VSHLu, // ...left/right by unsigned
-
- // Vector shift by immediate:
- VSHLIMM, // ...left
- VSHRsIMM, // ...right (signed)
- VSHRuIMM, // ...right (unsigned)
-
- // Vector rounding shift by immediate:
- VRSHRsIMM, // ...right (signed)
- VRSHRuIMM, // ...right (unsigned)
- VRSHRNIMM, // ...right narrow
-
- // Vector saturating shift by immediate:
- VQSHLsIMM, // ...left (signed)
- VQSHLuIMM, // ...left (unsigned)
- VQSHLsuIMM, // ...left (signed to unsigned)
- VQSHRNsIMM, // ...right narrow (signed)
- VQSHRNuIMM, // ...right narrow (unsigned)
- VQSHRNsuIMM, // ...right narrow (signed to unsigned)
-
- // Vector saturating rounding shift by immediate:
- VQRSHRNsIMM, // ...right narrow (signed)
- VQRSHRNuIMM, // ...right narrow (unsigned)
- VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
-
- // Vector shift and insert:
- VSLIIMM, // ...left
- VSRIIMM, // ...right
-
- // Vector get lane (VMOV scalar to ARM core register)
- // (These are used for 8- and 16-bit element types only.)
- VGETLANEu, // zero-extend vector extract element
- VGETLANEs, // sign-extend vector extract element
-
- // Vector move immediate and move negated immediate:
- VMOVIMM,
- VMVNIMM,
-
- // Vector move f32 immediate:
- VMOVFPIMM,
-
- // Move H <-> R, clearing top 16 bits
- VMOVrh,
- VMOVhr,
-
- // Vector duplicate:
- VDUP,
- VDUPLANE,
-
- // Vector shuffles:
- VEXT, // extract
- VREV64, // reverse elements within 64-bit doublewords
- VREV32, // reverse elements within 32-bit words
- VREV16, // reverse elements within 16-bit halfwords
- VZIP, // zip (interleave)
- VUZP, // unzip (deinterleave)
- VTRN, // transpose
- VTBL1, // 1-register shuffle with mask
- VTBL2, // 2-register shuffle with mask
- VMOVN, // MVE vmovn
-
- // MVE Saturating truncates
- VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
- VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
-
- // MVE float <> half converts
- VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top
- // lanes
- VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
-
- // MVE VIDUP instruction, taking a start value and increment.
- VIDUP,
-
- // Vector multiply long:
- VMULLs, // ...signed
- VMULLu, // ...unsigned
-
- VQDMULH, // MVE vqdmulh instruction
-
- // MVE reductions
- VADDVs, // sign- or zero-extend the elements of a vector to i32,
- VADDVu, // add them all together, and return an i32 of their sum
- VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
- VADDVpu,
- VADDLVs, // sign- or zero-extend elements to i64 and sum, returning
- VADDLVu, // the low and high 32-bit halves of the sum
- VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
- VADDLVAu, // provided as low and high halves
- VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
- VADDLVpu,
- VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
- VADDLVApu,
- VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply
- VMLAVu, // them and add the results together, returning an i32 of the sum
- VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
- VMLAVpu,
- VMLALVs, // Same as VMLAV but with i64, returning the low and
- VMLALVu, // high 32-bit halves of the sum
- VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
- VMLALVpu,
- VMLALVAs, // Same as VMLALV but also add an input accumulator
- VMLALVAu, // provided as low and high halves
- VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
- VMLALVApu,
- VMINVu, // Find minimum unsigned value of a vector and register
- VMINVs, // Find minimum signed value of a vector and register
- VMAXVu, // Find maximum unsigned value of a vector and register
- VMAXVs, // Find maximum signed value of a vector and register
-
- SMULWB, // Signed multiply word by half word, bottom
- SMULWT, // Signed multiply word by half word, top
- UMLAL, // 64bit Unsigned Accumulate Multiply
- SMLAL, // 64bit Signed Accumulate Multiply
- UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
- SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
- SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
- SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
- SMLALTT, // 64-bit signed accumulate multiply top, top 16
- SMLALD, // Signed multiply accumulate long dual
- SMLALDX, // Signed multiply accumulate long dual exchange
- SMLSLD, // Signed multiply subtract long dual
- SMLSLDX, // Signed multiply subtract long dual exchange
- SMMLAR, // Signed multiply long, round and add
- SMMLSR, // Signed multiply long, subtract and round
-
- // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b
- // stands for.
- QADD8b,
- QSUB8b,
- QADD16b,
- QSUB16b,
- UQADD8b,
- UQSUB8b,
- UQADD16b,
- UQSUB16b,
-
- // Operands of the standard BUILD_VECTOR node are not legalized, which
- // is fine if BUILD_VECTORs are always lowered to shuffles or other
- // operations, but for ARM some BUILD_VECTORs are legal as-is and their
- // operands need to be legalized. Define an ARM-specific version of
- // BUILD_VECTOR for this purpose.
- BUILD_VECTOR,
-
- // Bit-field insert
- BFI,
-
- // Vector OR with immediate
- VORRIMM,
- // Vector AND with NOT of immediate
- VBICIMM,
-
- // Pseudo vector bitwise select
- VBSP,
-
- // Pseudo-instruction representing a memory copy using ldm/stm
- // instructions.
- MEMCPY,
-
- // Pseudo-instruction representing a memory copy using a tail predicated
- // loop
- MEMCPYLOOP,
- // Pseudo-instruction representing a memset using a tail predicated
- // loop
- MEMSETLOOP,
-
- // V8.1MMainline condition select
- CSINV, // Conditional select invert.
- CSNEG, // Conditional select negate.
- CSINC, // Conditional select increment.
-
- // Vector load N-element structure to all lanes:
- FIRST_MEMORY_OPCODE,
- VLD1DUP = FIRST_MEMORY_OPCODE,
- VLD2DUP,
- VLD3DUP,
- VLD4DUP,
-
- // NEON loads with post-increment base updates:
- VLD1_UPD,
- VLD2_UPD,
- VLD3_UPD,
- VLD4_UPD,
- VLD2LN_UPD,
- VLD3LN_UPD,
- VLD4LN_UPD,
- VLD1DUP_UPD,
- VLD2DUP_UPD,
- VLD3DUP_UPD,
- VLD4DUP_UPD,
- VLD1x2_UPD,
- VLD1x3_UPD,
- VLD1x4_UPD,
-
- // NEON stores with post-increment base updates:
- VST1_UPD,
- VST2_UPD,
- VST3_UPD,
- VST4_UPD,
- VST2LN_UPD,
- VST3LN_UPD,
- VST4LN_UPD,
- VST1x2_UPD,
- VST1x3_UPD,
- VST1x4_UPD,
-
- // Load/Store of dual registers
- LDRD,
- STRD,
- LAST_MEMORY_OPCODE = STRD,
- };
-
- } // end namespace ARMISD
-
namespace ARM {
/// Possible values of current rounding mode, which is specified in bits
/// 23:22 of FPSCR.
@@ -427,8 +114,6 @@ class VectorType;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
- const char *getTargetNodeName(unsigned Opcode) const override;
-
bool isSelectSupported(SelectSupportKind Kind) const override {
// ARM does not support scalar condition selects on vectors.
return (Kind != ScalarCondVectorVal);
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index f7176a65d8163..9fb3d4a9d2f51 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -40,7 +40,7 @@ def SDT_ARMCMov : SDTypeProfile<1, 4, [
SDTCisVT<4, FlagsVT>, // in flags
]>;
-def SDT_ARMBrcond : SDTypeProfile<0, 2, [
+def SDT_ARMBrcond : SDTypeProfile<0, 3, [
SDTCisVT<0, OtherVT>, // target basic block
SDTCisVT<1, CondCodeVT>, // condition code
SDTCisVT<2, FlagsVT>, // in flags
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index bf7c962f02efc..989a94cf38bc3 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -10,9 +10,14 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMSelectionDAGInfo.h"
#include "ARMTargetTransformInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Support/CommandLine.h"
+
+#define GET_SDNODE_DESC
+#include "ARMGenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "arm-selectiondag-info"
@@ -30,9 +35,90 @@ static cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
"Allow (may be subject to certain conditions) "
"conversion of memcpy to TP loop.")));
+ARMSelectionDAGInfo::ARMSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {}
+
+const char *ARMSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V) \
+ case V: \
+ return #V;
+
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<ARMISD::NodeType>(Opcode)) {
+ MAKE_CASE(ARMISD::DYN_ALLOC)
+ MAKE_CASE(ARMISD::MVESEXT)
+ MAKE_CASE(ARMISD::MVEZEXT)
+ MAKE_CASE(ARMISD::MVETRUNC)
+ MAKE_CASE(ARMISD::UMAAL)
+ MAKE_CASE(ARMISD::UMLAL)
+ MAKE_CASE(ARMISD::SMLAL)
+ MAKE_CASE(ARMISD::BUILD_VECTOR)
+ MAKE_CASE(ARMISD::VLD1DUP)
+ MAKE_CASE(ARMISD::VLD2DUP)
+ MAKE_CASE(ARMISD::VLD3DUP)
+ MAKE_CASE(ARMISD::VLD4DUP)
+ MAKE_CASE(ARMISD::VLD1_UPD)
+ MAKE_CASE(ARMISD::VLD2_UPD)
+ MAKE_CASE(ARMISD::VLD3_UPD)
+ MAKE_CASE(ARMISD::VLD4_UPD)
+ MAKE_CASE(ARMISD::VLD1x2_UPD)
+ MAKE_CASE(ARMISD::VLD1x3_UPD)
+ MAKE_CASE(ARMISD::VLD1x4_UPD)
+ MAKE_CASE(ARMISD::VLD2LN_UPD)
+ MAKE_CASE(ARMISD::VLD3LN_UPD)
+ MAKE_CASE(ARMISD::VLD4LN_UPD)
+ MAKE_CASE(ARMISD::VLD1DUP_UPD)
+ MAKE_CASE(ARMISD::VLD2DUP_UPD)
+ MAKE_CASE(ARMISD::VLD3DUP_UPD)
+ MAKE_CASE(ARMISD::VLD4DUP_UPD)
+ MAKE_CASE(ARMISD::VST1_UPD)
+ MAKE_CASE(ARMISD::VST3_UPD)
+ MAKE_CASE(ARMISD::VST1x2_UPD)
+ MAKE_CASE(ARMISD::VST1x3_UPD)
+ MAKE_CASE(ARMISD::VST1x4_UPD)
+ MAKE_CASE(ARMISD::VST2LN_UPD)
+ MAKE_CASE(ARMISD::VST3LN_UPD)
+ MAKE_CASE(ARMISD::VST4LN_UPD)
+ MAKE_CASE(ARMISD::WLS)
+ MAKE_CASE(ARMISD::WLSSETUP)
+ MAKE_CASE(ARMISD::LE)
+ MAKE_CASE(ARMISD::LOOP_DEC)
+ }
+#undef MAKE_CASE
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
- Opcode <= ARMISD::LAST_MEMORY_OPCODE;
+ // These nodes don't have corresponding entries in *.td files yet.
+ if (Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
+ Opcode <= ARMISD::LAST_MEMORY_OPCODE)
+ return true;
+
+ return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
+}
+
+void ARMSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ARMISD::WIN__DBZCHK:
+ // invalid number of results; expected 2, got 1
+ case ARMISD::WIN__CHKSTK:
+ // invalid number of results; expected 1, got 2
+ case ARMISD::COPY_STRUCT_BYVAL:
+ // invalid number of operands; expected 6, got 5
+ case ARMISD::MEMCPY:
+ // invalid number of operands; expected 5, got 4
+ case ARMISD::VMOVRRD:
+ // operand #0 must have type f64, but has type v1i64/v4f16/v8i8
+ case ARMISD::VMOVIMM:
+ // operand #0 must have type i32, but has type i16
+ return;
+ }
+
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
// Emit, if possible, a specialized version of the given Libcall. Typically this
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index d68150e66567c..574e0e517332f 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -17,7 +17,71 @@
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "ARMGenSDNodeInfo.inc"
+
namespace llvm {
+namespace ARMISD {
+
+enum NodeType : unsigned {
+ DYN_ALLOC = GENERATED_OPCODE_END, // Dynamic allocation on the stack.
+
+ WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart
+ WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup.
+ LOOP_DEC, // Really a part of LE, performs the sub
+ LE, // Low-overhead loops, Loop End
+
+ MVESEXT, // Legalization aids for extending a vector into two/four vectors.
+ MVEZEXT, // or truncating two/four vectors into one. Eventually becomes
+ MVETRUNC, // stack store/load sequence, if not optimized to anything else.
+
+ UMLAL, // 64bit Unsigned Accumulate Multiply
+ SMLAL, // 64bit Signed Accumulate Multiply
+ UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
+
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
+ // Vector load N-element structure to all lanes:
+ FIRST_MEMORY_OPCODE,
+ VLD1DUP = FIRST_MEMORY_OPCODE,
+ VLD2DUP,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD1DUP_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+ VLD1x2_UPD,
+ VLD1x3_UPD,
+ VLD1x4_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST3_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD,
+ VST1x2_UPD,
+ VST1x3_UPD,
+ VST1x4_UPD,
+ LAST_MEMORY_OPCODE = VST1x4_UPD,
+};
+
+} // namespace ARMISD
namespace ARM_AM {
static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) {
@@ -35,10 +99,17 @@ namespace ARM_AM {
}
} // end namespace ARM_AM
-class ARMSelectionDAGInfo : public SelectionDAGTargetInfo {
+class ARMSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ ARMSelectionDAGInfo();
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
bool isTargetMemoryOpcode(unsigned Opcode) const override;
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
+
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment,
@@ -66,6 +137,6 @@ class ARMSelectionDAGInfo : public SelectionDAGTargetInfo {
RTLIB::Libcall LC) const;
};
-}
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index fa778cad4af8e..eb3ad01a54fb2 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank)
tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM ARMGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables)
diff --git a/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp b/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp
index ca9afded0c0c4..c763da95fa455 100644
--- a/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp
+++ b/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp
@@ -5,7 +5,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/AsmParser/Parser.h"
>From f21a450ec69efa52f5e933ea6227fba366106c72 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:01:21 +0300
Subject: [PATCH 2/9] Hexagon
---
llvm/lib/Target/Hexagon/CMakeLists.txt | 1 +
.../Target/Hexagon/HexagonISelLowering.cpp | 62 ------------
llvm/lib/Target/Hexagon/HexagonISelLowering.h | 97 -------------------
.../Hexagon/HexagonSelectionDAGInfo.cpp | 61 ++++++++++++
.../Target/Hexagon/HexagonSelectionDAGInfo.h | 63 +++++++++++-
5 files changed, 122 insertions(+), 162 deletions(-)
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index 1a5f09642ea66..85ec5c7c2d45e 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -11,6 +11,7 @@ tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(HexagonCommonTableGen)
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 894a07e6b68c2..87fbcfec600e7 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1914,66 +1914,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
computeRegisterProperties(&HRI);
}
-const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((HexagonISD::NodeType)Opcode) {
- case HexagonISD::ADDC: return "HexagonISD::ADDC";
- case HexagonISD::SUBC: return "HexagonISD::SUBC";
- case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
- case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
- case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
- case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
- case HexagonISD::CALL: return "HexagonISD::CALL";
- case HexagonISD::CALLnr: return "HexagonISD::CALLnr";
- case HexagonISD::CALLR: return "HexagonISD::CALLR";
- case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
- case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
- case HexagonISD::CONST32: return "HexagonISD::CONST32";
- case HexagonISD::CP: return "HexagonISD::CP";
- case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
- case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
- case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT";
- case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
- case HexagonISD::INSERT: return "HexagonISD::INSERT";
- case HexagonISD::JT: return "HexagonISD::JT";
- case HexagonISD::RET_GLUE: return "HexagonISD::RET_GLUE";
- case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
- case HexagonISD::VASL: return "HexagonISD::VASL";
- case HexagonISD::VASR: return "HexagonISD::VASR";
- case HexagonISD::VLSR: return "HexagonISD::VLSR";
- case HexagonISD::MFSHL: return "HexagonISD::MFSHL";
- case HexagonISD::MFSHR: return "HexagonISD::MFSHR";
- case HexagonISD::SSAT: return "HexagonISD::SSAT";
- case HexagonISD::USAT: return "HexagonISD::USAT";
- case HexagonISD::SMUL_LOHI: return "HexagonISD::SMUL_LOHI";
- case HexagonISD::UMUL_LOHI: return "HexagonISD::UMUL_LOHI";
- case HexagonISD::USMUL_LOHI: return "HexagonISD::USMUL_LOHI";
- case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW";
- case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
- case HexagonISD::VROR: return "HexagonISD::VROR";
- case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
- case HexagonISD::READTIMER: return "HexagonISD::READTIMER";
- case HexagonISD::THREAD_POINTER:
- return "HexagonISD::THREAD_POINTER";
- case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
- case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
- case HexagonISD::D2P: return "HexagonISD::D2P";
- case HexagonISD::P2D: return "HexagonISD::P2D";
- case HexagonISD::V2Q: return "HexagonISD::V2Q";
- case HexagonISD::Q2V: return "HexagonISD::Q2V";
- case HexagonISD::QCAT: return "HexagonISD::QCAT";
- case HexagonISD::QTRUE: return "HexagonISD::QTRUE";
- case HexagonISD::QFALSE: return "HexagonISD::QFALSE";
- case HexagonISD::TL_EXTEND: return "HexagonISD::TL_EXTEND";
- case HexagonISD::TL_TRUNCATE: return "HexagonISD::TL_TRUNCATE";
- case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
- case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
- case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
- case HexagonISD::ISEL: return "HexagonISD::ISEL";
- case HexagonISD::OP_END: break;
- }
- return nullptr;
-}
-
bool
HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
const SDLoc &dl, SelectionDAG &DAG) const {
@@ -3368,8 +3308,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
default:
#ifndef NDEBUG
Op.getNode()->dumpr(&DAG);
- if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
- errs() << "Error: check for a non-legal type in this operation\n";
#endif
llvm_unreachable("Should not custom lower this!");
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index f4d2a79051c10..580cb48e37465 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -29,102 +29,6 @@
namespace llvm {
-namespace HexagonISD {
-
-// clang-format off
-enum NodeType : unsigned {
- OP_BEGIN = ISD::BUILTIN_OP_END,
-
- CONST32 = OP_BEGIN,
- CONST32_GP, // For marking data present in GP.
- ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout).
- SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout).
- ALLOCA,
-
- AT_GOT, // Index in GOT.
- AT_PCREL, // Offset relative to PC.
-
- CALL, // Function call.
- CALLnr, // Function call that does not return.
- CALLR,
-
- RET_GLUE, // Return with a glue operand.
- BARRIER, // Memory barrier.
- JT, // Jump table.
- CP, // Constant pool.
-
- COMBINE,
- VASL, // Vector shifts by a scalar value
- VASR,
- VLSR,
- MFSHL, // Funnel shifts with the shift amount guaranteed to be
- MFSHR, // within the range of the bit width of the element.
-
- SSAT, // Signed saturate.
- USAT, // Unsigned saturate.
- SMUL_LOHI, // Same as ISD::SMUL_LOHI, but opaque to the combiner.
- UMUL_LOHI, // Same as ISD::UMUL_LOHI, but opaque to the combiner.
- // We want to legalize MULH[SU] to [SU]MUL_LOHI, but the
- // combiner will keep rewriting it back to MULH[SU].
- USMUL_LOHI, // Like SMUL_LOHI, but unsigned*signed.
-
- TSTBIT,
- INSERT,
- EXTRACTU,
- VEXTRACTW,
- VINSERTW0,
- VROR,
- TC_RETURN,
- EH_RETURN,
- DCFETCH,
- READCYCLE,
- READTIMER,
- THREAD_POINTER,
- PTRUE,
- PFALSE,
- D2P, // Convert 8-byte value to 8-bit predicate register. [*]
- P2D, // Convert 8-bit predicate register to 8-byte value. [*]
- V2Q, // Convert HVX vector to a vector predicate reg. [*]
- Q2V, // Convert vector predicate to an HVX vector. [*]
- // [*] The equivalence is defined as "Q <=> (V != 0)",
- // where the != operation compares bytes.
- // Note: V != 0 is implemented as V >u 0.
- QCAT,
- QTRUE,
- QFALSE,
-
- TL_EXTEND, // Wrappers for ISD::*_EXTEND and ISD::TRUNCATE to prevent DAG
- TL_TRUNCATE, // from auto-folding operations, e.g.
- // (i32 ext (i16 ext i8)) would be folded to (i32 ext i8).
- // To simplify the type legalization, we want to keep these
- // single steps separate during type legalization.
- // TL_[EXTEND|TRUNCATE] Inp, i128 _, i32 Opc
- // * Inp is the original input to extend/truncate,
- // * _ is a dummy operand with an illegal type (can be undef),
- // * Opc is the original opcode.
- // The legalization process (in Hexagon lowering code) will
- // first deal with the "real" types (i.e. Inp and the result),
- // and once all of them are processed, the wrapper node will
- // be replaced with the original ISD node. The dummy illegal
- // operand is there to make sure that the legalization hooks
- // are called again after everything else is legal, giving
- // us the opportunity to undo the wrapping.
-
- TYPECAST, // No-op that's used to convert between different legal
- // types in a register.
- VALIGN, // Align two vectors (in Op0, Op1) to one that would have
- // been loaded from address in Op2.
- VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is
- // an address in a vector load, then it's a no-op.
- ISEL, // Marker for nodes that were created during ISel, and
- // which need explicit selection (would have been left
- // unselected otherwise).
- OP_END
-};
-
-} // end namespace HexagonISD
-// clang-format on
-
class HexagonSubtarget;
class HexagonTargetLowering : public TargetLowering {
@@ -189,7 +93,6 @@ class HexagonTargetLowering : public TargetLowering {
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<MVT, unsigned>
handleMaskRegisterForCallingConv(const HexagonSubtarget &Subtarget,
EVT VT) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 33aa6e4a26145..f601f0b2ec020 100644
--- a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -10,12 +10,73 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonSelectionDAGInfo.h"
#include "HexagonTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
+
+#define GET_SDNODE_DESC
+#include "HexagonGenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "hexagon-selectiondag-info"
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(HexagonGenSDNodeInfo) {}
+
+const char *HexagonSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<HexagonISD::NodeType>(Opcode)) {
+ case HexagonISD::ADDC:
+ return "HexagonISD::ADDC";
+ case HexagonISD::SUBC:
+ return "HexagonISD::SUBC";
+ case HexagonISD::CALLR:
+ return "HexagonISD::CALLR";
+ case HexagonISD::SMUL_LOHI:
+ return "HexagonISD::SMUL_LOHI";
+ case HexagonISD::UMUL_LOHI:
+ return "HexagonISD::UMUL_LOHI";
+ case HexagonISD::USMUL_LOHI:
+ return "HexagonISD::USMUL_LOHI";
+ case HexagonISD::VROR:
+ return "HexagonISD::VROR";
+ case HexagonISD::D2P:
+ return "HexagonISD::D2P";
+ case HexagonISD::P2D:
+ return "HexagonISD::P2D";
+ case HexagonISD::V2Q:
+ return "HexagonISD::V2Q";
+ case HexagonISD::Q2V:
+ return "HexagonISD::Q2V";
+ case HexagonISD::TL_EXTEND:
+ return "HexagonISD::TL_EXTEND";
+ case HexagonISD::TL_TRUNCATE:
+ return "HexagonISD::TL_TRUNCATE";
+ case HexagonISD::TYPECAST:
+ return "HexagonISD::TYPECAST";
+ case HexagonISD::ISEL:
+ return "HexagonISD::ISEL";
+ }
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
+void HexagonSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case HexagonISD::VALIGNADDR:
+ // invalid number of operands; expected 1, got 2
+ case HexagonISD::VINSERTW0:
+ // operand #1 must have type i32, but has type v4i8/v2i16
+ return;
+ }
+
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+}
+
SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
diff --git a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 0d3b1725d1bc4..c033f3421e5e5 100644
--- a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -15,11 +15,68 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "HexagonGenSDNodeInfo.inc"
+
namespace llvm {
+namespace HexagonISD {
+
+enum NodeType : unsigned {
+ ADDC = GENERATED_OPCODE_END, // Add with carry: (X, Y, Cin) -> (X+Y, Cout).
+ SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout).
+
+ CALLR,
+
+ SMUL_LOHI, // Same as ISD::SMUL_LOHI, but opaque to the combiner.
+ UMUL_LOHI, // Same as ISD::UMUL_LOHI, but opaque to the combiner.
+ // We want to legalize MULH[SU] to [SU]MUL_LOHI, but the
+ // combiner will keep rewriting it back to MULH[SU].
+ USMUL_LOHI, // Like SMUL_LOHI, but unsigned*signed.
+
+ VROR,
+ D2P, // Convert 8-byte value to 8-bit predicate register. [*]
+ P2D, // Convert 8-bit predicate register to 8-byte value. [*]
+ V2Q, // Convert HVX vector to a vector predicate reg. [*]
+ Q2V, // Convert vector predicate to an HVX vector. [*]
+ // [*] The equivalence is defined as "Q <=> (V != 0)",
+ // where the != operation compares bytes.
+ // Note: V != 0 is implemented as V >u 0.
+
+ TL_EXTEND, // Wrappers for ISD::*_EXTEND and ISD::TRUNCATE to prevent DAG
+ TL_TRUNCATE, // from auto-folding operations, e.g.
+ // (i32 ext (i16 ext i8)) would be folded to (i32 ext i8).
+ // To simplify the type legalization, we want to keep these
+ // single steps separate during type legalization.
+ // TL_[EXTEND|TRUNCATE] Inp, i128 _, i32 Opc
+ // * Inp is the original input to extend/truncate,
+ // * _ is a dummy operand with an illegal type (can be undef),
+ // * Opc is the original opcode.
+ // The legalization process (in Hexagon lowering code) will
+ // first deal with the "real" types (i.e. Inp and the result),
+ // and once all of them are processed, the wrapper node will
+ // be replaced with the original ISD node. The dummy illegal
+ // operand is there to make sure that the legalization hooks
+ // are called again after everything else is legal, giving
+ // us the opportunity to undo the wrapping.
-class HexagonSelectionDAGInfo : public SelectionDAGTargetInfo {
+ TYPECAST, // No-op that's used to convert between different legal
+ // types in a register.
+ ISEL, // Marker for nodes that were created during ISel, and
+ // which need explicit selection (would have been left
+ // unselected otherwise).
+ // clang-format on
+};
+
+} // namespace HexagonISD
+
+class HexagonSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
- explicit HexagonSelectionDAGInfo() = default;
+ HexagonSelectionDAGInfo();
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
@@ -29,6 +86,6 @@ class HexagonSelectionDAGInfo : public SelectionDAGTargetInfo {
MachinePointerInfo SrcPtrInfo) const override;
};
-}
+} // namespace llvm
#endif
>From c14b6fc90ec6fe090de2b5cc0f6e7b3e7876fcf1 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 03:11:17 +0300
Subject: [PATCH 3/9] LoongArch
---
llvm/lib/Target/LoongArch/CMakeLists.txt | 2 +
.../Target/LoongArch/LoongArchISelDAGToDAG.h | 1 +
.../LoongArch/LoongArchISelLowering.cpp | 122 +-----------
.../Target/LoongArch/LoongArchISelLowering.h | 176 ------------------
.../LoongArch/LoongArchSelectionDAGInfo.cpp | 38 ++++
.../LoongArch/LoongArchSelectionDAGInfo.h | 41 ++++
.../Target/LoongArch/LoongArchSubtarget.cpp | 11 +-
.../lib/Target/LoongArch/LoongArchSubtarget.h | 10 +-
8 files changed, 100 insertions(+), 301 deletions(-)
create mode 100644 llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp
create mode 100644 llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h
diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt
index 0f674b1b0fa9e..8689d09140a1e 100644
--- a/llvm/lib/Target/LoongArch/CMakeLists.txt
+++ b/llvm/lib/Target/LoongArch/CMakeLists.txt
@@ -10,6 +10,7 @@ tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM LoongArchGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(LoongArchCommonTableGen)
@@ -27,6 +28,7 @@ add_llvm_target(LoongArchCodeGen
LoongArchMergeBaseOffset.cpp
LoongArchOptWInstrs.cpp
LoongArchRegisterInfo.cpp
+ LoongArchSelectionDAGInfo.cpp
LoongArchSubtarget.cpp
LoongArchTargetMachine.cpp
LoongArchTargetTransformInfo.cpp
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 1eed877dcef16..4c8dcb8fa48af 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
#include "LoongArch.h"
+#include "LoongArchSelectionDAGInfo.h"
#include "LoongArchTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index cf4ffc82f6009..f7cc93153267a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -15,6 +15,7 @@
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
#include "LoongArchRegisterInfo.h"
+#include "LoongArchSelectionDAGInfo.h"
#include "LoongArchSubtarget.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
@@ -4459,7 +4460,7 @@ SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
-static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
+static unsigned getLoongArchWOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode");
@@ -4495,7 +4496,7 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
unsigned ExtOpc = ISD::ANY_EXTEND) {
SDLoc DL(N);
- LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
+ unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
SDValue NewOp0, NewRes;
switch (NumOp) {
@@ -7483,123 +7484,6 @@ bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
return true;
}
-const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((LoongArchISD::NodeType)Opcode) {
- case LoongArchISD::FIRST_NUMBER:
- break;
-
-#define NODE_NAME_CASE(node) \
- case LoongArchISD::node: \
- return "LoongArchISD::" #node;
-
- // TODO: Add more target-dependent nodes later.
- NODE_NAME_CASE(CALL)
- NODE_NAME_CASE(CALL_MEDIUM)
- NODE_NAME_CASE(CALL_LARGE)
- NODE_NAME_CASE(RET)
- NODE_NAME_CASE(TAIL)
- NODE_NAME_CASE(TAIL_MEDIUM)
- NODE_NAME_CASE(TAIL_LARGE)
- NODE_NAME_CASE(SELECT_CC)
- NODE_NAME_CASE(BR_CC)
- NODE_NAME_CASE(BRCOND)
- NODE_NAME_CASE(SLL_W)
- NODE_NAME_CASE(SRA_W)
- NODE_NAME_CASE(SRL_W)
- NODE_NAME_CASE(BSTRINS)
- NODE_NAME_CASE(BSTRPICK)
- NODE_NAME_CASE(MOVGR2FR_W)
- NODE_NAME_CASE(MOVGR2FR_W_LA64)
- NODE_NAME_CASE(MOVGR2FR_D)
- NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
- NODE_NAME_CASE(MOVFR2GR_S_LA64)
- NODE_NAME_CASE(FTINT)
- NODE_NAME_CASE(BUILD_PAIR_F64)
- NODE_NAME_CASE(SPLIT_PAIR_F64)
- NODE_NAME_CASE(REVB_2H)
- NODE_NAME_CASE(REVB_2W)
- NODE_NAME_CASE(BITREV_4B)
- NODE_NAME_CASE(BITREV_8B)
- NODE_NAME_CASE(BITREV_W)
- NODE_NAME_CASE(ROTR_W)
- NODE_NAME_CASE(ROTL_W)
- NODE_NAME_CASE(DIV_W)
- NODE_NAME_CASE(DIV_WU)
- NODE_NAME_CASE(MOD_W)
- NODE_NAME_CASE(MOD_WU)
- NODE_NAME_CASE(CLZ_W)
- NODE_NAME_CASE(CTZ_W)
- NODE_NAME_CASE(DBAR)
- NODE_NAME_CASE(IBAR)
- NODE_NAME_CASE(BREAK)
- NODE_NAME_CASE(SYSCALL)
- NODE_NAME_CASE(CRC_W_B_W)
- NODE_NAME_CASE(CRC_W_H_W)
- NODE_NAME_CASE(CRC_W_W_W)
- NODE_NAME_CASE(CRC_W_D_W)
- NODE_NAME_CASE(CRCC_W_B_W)
- NODE_NAME_CASE(CRCC_W_H_W)
- NODE_NAME_CASE(CRCC_W_W_W)
- NODE_NAME_CASE(CRCC_W_D_W)
- NODE_NAME_CASE(CSRRD)
- NODE_NAME_CASE(CSRWR)
- NODE_NAME_CASE(CSRXCHG)
- NODE_NAME_CASE(IOCSRRD_B)
- NODE_NAME_CASE(IOCSRRD_H)
- NODE_NAME_CASE(IOCSRRD_W)
- NODE_NAME_CASE(IOCSRRD_D)
- NODE_NAME_CASE(IOCSRWR_B)
- NODE_NAME_CASE(IOCSRWR_H)
- NODE_NAME_CASE(IOCSRWR_W)
- NODE_NAME_CASE(IOCSRWR_D)
- NODE_NAME_CASE(CPUCFG)
- NODE_NAME_CASE(MOVGR2FCSR)
- NODE_NAME_CASE(MOVFCSR2GR)
- NODE_NAME_CASE(CACOP_D)
- NODE_NAME_CASE(CACOP_W)
- NODE_NAME_CASE(VSHUF)
- NODE_NAME_CASE(VPICKEV)
- NODE_NAME_CASE(VPICKOD)
- NODE_NAME_CASE(VPACKEV)
- NODE_NAME_CASE(VPACKOD)
- NODE_NAME_CASE(VILVL)
- NODE_NAME_CASE(VILVH)
- NODE_NAME_CASE(VSHUF4I)
- NODE_NAME_CASE(VREPLVEI)
- NODE_NAME_CASE(VREPLGR2VR)
- NODE_NAME_CASE(XVPERMI)
- NODE_NAME_CASE(XVPERM)
- NODE_NAME_CASE(XVREPLVE0)
- NODE_NAME_CASE(XVREPLVE0Q)
- NODE_NAME_CASE(XVINSVE0)
- NODE_NAME_CASE(VPICK_SEXT_ELT)
- NODE_NAME_CASE(VPICK_ZEXT_ELT)
- NODE_NAME_CASE(VREPLVE)
- NODE_NAME_CASE(VALL_ZERO)
- NODE_NAME_CASE(VANY_ZERO)
- NODE_NAME_CASE(VALL_NONZERO)
- NODE_NAME_CASE(VANY_NONZERO)
- NODE_NAME_CASE(FRECIPE)
- NODE_NAME_CASE(FRSQRTE)
- NODE_NAME_CASE(VSLLI)
- NODE_NAME_CASE(VSRLI)
- NODE_NAME_CASE(VBSLL)
- NODE_NAME_CASE(VBSRL)
- NODE_NAME_CASE(VLDREPL)
- NODE_NAME_CASE(VMSKLTZ)
- NODE_NAME_CASE(VMSKGEZ)
- NODE_NAME_CASE(VMSKEQZ)
- NODE_NAME_CASE(VMSKNEZ)
- NODE_NAME_CASE(XVMSKLTZ)
- NODE_NAME_CASE(XVMSKGEZ)
- NODE_NAME_CASE(XVMSKEQZ)
- NODE_NAME_CASE(XVMSKNEZ)
- NODE_NAME_CASE(VHADDW)
- }
-#undef NODE_NAME_CASE
- return nullptr;
-}
-
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d7748467c7..232ac6092149d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -21,179 +21,6 @@
namespace llvm {
class LoongArchSubtarget;
-namespace LoongArchISD {
-enum NodeType : unsigned {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // TODO: add more LoongArchISDs
- CALL,
- CALL_MEDIUM,
- CALL_LARGE,
- RET,
- TAIL,
- TAIL_MEDIUM,
- TAIL_LARGE,
-
- // Select
- SELECT_CC,
-
- // Branch
- BR_CC,
- BRCOND,
-
- // 32-bit shifts, directly matching the semantics of the named LoongArch
- // instructions.
- SLL_W,
- SRA_W,
- SRL_W,
-
- ROTL_W,
- ROTR_W,
-
- // unsigned 32-bit integer division
- DIV_W,
- MOD_W,
- DIV_WU,
- MOD_WU,
-
- // FPR<->GPR transfer operations
- MOVGR2FR_W,
- MOVGR2FR_W_LA64,
- MOVGR2FR_D,
- MOVGR2FR_D_LO_HI,
- MOVFR2GR_S_LA64,
- MOVFCSR2GR,
- MOVGR2FCSR,
-
- FTINT,
-
- // Build and split F64 pair
- BUILD_PAIR_F64,
- SPLIT_PAIR_F64,
-
- // Bit counting operations
- CLZ_W,
- CTZ_W,
-
- BSTRINS,
- BSTRPICK,
-
- // Byte-swapping and bit-reversal
- REVB_2H,
- REVB_2W,
- BITREV_4B,
- BITREV_8B,
- BITREV_W,
-
- // Intrinsic operations start ============================================
- BREAK,
- CACOP_D,
- CACOP_W,
- DBAR,
- IBAR,
- SYSCALL,
-
- // CRC check operations
- CRC_W_B_W,
- CRC_W_H_W,
- CRC_W_W_W,
- CRC_W_D_W,
- CRCC_W_B_W,
- CRCC_W_H_W,
- CRCC_W_W_W,
- CRCC_W_D_W,
-
- CSRRD,
-
- // Write new value to CSR and return old value.
- // Operand 0: A chain pointer.
- // Operand 1: The new value to write.
- // Operand 2: The address of the required CSR.
- // Result 0: The old value of the CSR.
- // Result 1: The new chain pointer.
- CSRWR,
-
- // Similar to CSRWR but with a write mask.
- // Operand 0: A chain pointer.
- // Operand 1: The new value to write.
- // Operand 2: The write mask.
- // Operand 3: The address of the required CSR.
- // Result 0: The old value of the CSR.
- // Result 1: The new chain pointer.
- CSRXCHG,
-
- // IOCSR access operations
- IOCSRRD_B,
- IOCSRRD_W,
- IOCSRRD_H,
- IOCSRRD_D,
- IOCSRWR_B,
- IOCSRWR_H,
- IOCSRWR_W,
- IOCSRWR_D,
-
- // Read CPU configuration information operation
- CPUCFG,
-
- // Vector Shuffle
- VREPLVE,
- VSHUF,
- VPICKEV,
- VPICKOD,
- VPACKEV,
- VPACKOD,
- VILVL,
- VILVH,
- VSHUF4I,
- VREPLVEI,
- VREPLGR2VR,
- XVPERMI,
- XVPERM,
- XVREPLVE0,
- XVREPLVE0Q,
- XVINSVE0,
-
- // Extended vector element extraction
- VPICK_SEXT_ELT,
- VPICK_ZEXT_ELT,
-
- // Vector comparisons
- VALL_ZERO,
- VANY_ZERO,
- VALL_NONZERO,
- VANY_NONZERO,
-
- // Floating point approximate reciprocal operation
- FRECIPE,
- FRSQRTE,
-
- // Vector logicial left / right shift by immediate
- VSLLI,
- VSRLI,
-
- // Vector byte logicial left / right shift
- VBSLL,
- VBSRL,
-
- // Scalar load broadcast to vector
- VLDREPL,
-
- // Vector mask set by condition
- VMSKLTZ,
- VMSKGEZ,
- VMSKEQZ,
- VMSKNEZ,
- XVMSKLTZ,
- XVMSKGEZ,
- XVMSKEQZ,
- XVMSKNEZ,
-
- // Vector Horizontal Addition with Widening‌
- VHADDW
-
- // Intrinsic operations end =============================================
-};
-} // end namespace LoongArchISD
class LoongArchTargetLowering : public TargetLowering {
const LoongArchSubtarget &Subtarget;
@@ -213,9 +40,6 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- // This method returns the name of a target specific DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
// Lower incoming arguments, copy physregs into vregs.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp
new file mode 100644
index 0000000000000..19557954867d6
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp
@@ -0,0 +1,38 @@
+//===- LoongArchSelectionDAGInfo.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchSelectionDAGInfo.h"
+
+#define GET_SDNODE_DESC
+#include "LoongArchGenSDNodeInfo.inc"
+
+using namespace llvm;
+
+LoongArchSelectionDAGInfo::LoongArchSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(LoongArchGenSDNodeInfo) {}
+
+LoongArchSelectionDAGInfo::~LoongArchSelectionDAGInfo() = default;
+
+const char *
+LoongArchSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+ switch (static_cast<LoongArchISD::NodeType>(Opcode)) {
+ case LoongArchISD::VSHUF4I:
+ return "LoongArchISD::VSHUF4I";
+ }
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
+void LoongArchSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ if (N->getOpcode() == LoongArchISD::VLDREPL) {
+ // invalid number of operands; expected 2, got 3
+ return;
+ }
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h
new file mode 100644
index 0000000000000..c53304df4650b
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h
@@ -0,0 +1,41 @@
+//===- LoongArchSelectionDAGInfo.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H
+
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+#define GET_SDNODE_ENUM
+#include "LoongArchGenSDNodeInfo.inc"
+
+namespace llvm {
+namespace LoongArchISD {
+
+enum NodeType {
+ // This is skipped by TableGen because it has conflicting SDTypeProfiles.
+ VSHUF4I = GENERATED_OPCODE_END,
+};
+
+} // namespace LoongArchISD
+
+class LoongArchSelectionDAGInfo : public SelectionDAGGenTargetInfo {
+public:
+ LoongArchSelectionDAGInfo();
+
+ ~LoongArchSelectionDAGInfo() override;
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
index 76a8ba1c90e50..6293cbef23656 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
@@ -12,6 +12,7 @@
#include "LoongArchSubtarget.h"
#include "LoongArchFrameLowering.h"
+#include "LoongArchSelectionDAGInfo.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
using namespace llvm;
@@ -95,4 +96,12 @@ LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU,
: LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS),
FrameLowering(
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
- InstrInfo(*this), TLInfo(TM, *this) {}
+ InstrInfo(*this), TLInfo(TM, *this) {
+ TSInfo = std::make_unique<LoongArchSelectionDAGInfo>();
+}
+
+LoongArchSubtarget::~LoongArchSubtarget() = default;
+
+const SelectionDAGTargetInfo *LoongArchSubtarget::getSelectionDAGInfo() const {
+ return TSInfo.get();
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 2beff07949daf..b90542cc04c32 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -18,7 +18,6 @@
#include "LoongArchInstrInfo.h"
#include "LoongArchRegisterInfo.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -46,7 +45,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
LoongArchFrameLowering FrameLowering;
LoongArchInstrInfo InstrInfo;
LoongArchTargetLowering TLInfo;
- SelectionDAGTargetInfo TSInfo;
+ std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
Align PrefFunctionAlignment;
Align PrefLoopAlignment;
@@ -68,6 +67,8 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
StringRef FS, StringRef ABIName, const TargetMachine &TM);
+ ~LoongArchSubtarget() override;
+
// Parses features string setting specified subtarget options. The
// definition of this function is auto-generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
@@ -82,9 +83,8 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
const LoongArchTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
+
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool GETTER() const { return ATTRIBUTE; }
>From f642f722ddaa690a5d681afc2506acc17d768703 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:02:01 +0300
Subject: [PATCH 4/9] Mips
---
llvm/lib/Target/Mips/CMakeLists.txt | 1 +
llvm/lib/Target/Mips/MipsISelLowering.cpp | 122 ----------
llvm/lib/Target/Mips/MipsISelLowering.h | 216 +-----------------
llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp | 57 ++++-
llvm/lib/Target/Mips/MipsSelectionDAGInfo.h | 26 ++-
5 files changed, 79 insertions(+), 343 deletions(-)
diff --git a/llvm/lib/Target/Mips/CMakeLists.txt b/llvm/lib/Target/Mips/CMakeLists.txt
index 4a2277e9a80dc..726f0af0d8b0b 100644
--- a/llvm/lib/Target/Mips/CMakeLists.txt
+++ b/llvm/lib/Target/Mips/CMakeLists.txt
@@ -17,6 +17,7 @@ tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM MipsGenRegisterBank.inc -gen-register-bank)
tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM MipsGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MipsGenExegesis.inc -gen-exegesis)
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 2fd73275721b1..a0bca0448655f 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -171,128 +171,6 @@ SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
N->getOffset(), Flag);
}
-const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((MipsISD::NodeType)Opcode) {
- case MipsISD::FIRST_NUMBER: break;
- case MipsISD::JmpLink: return "MipsISD::JmpLink";
- case MipsISD::TailCall: return "MipsISD::TailCall";
- case MipsISD::Highest: return "MipsISD::Highest";
- case MipsISD::Higher: return "MipsISD::Higher";
- case MipsISD::Hi: return "MipsISD::Hi";
- case MipsISD::Lo: return "MipsISD::Lo";
- case MipsISD::GotHi: return "MipsISD::GotHi";
- case MipsISD::TlsHi: return "MipsISD::TlsHi";
- case MipsISD::GPRel: return "MipsISD::GPRel";
- case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
- case MipsISD::Ret: return "MipsISD::Ret";
- case MipsISD::ERet: return "MipsISD::ERet";
- case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN";
- case MipsISD::FAbs: return "MipsISD::FAbs";
- case MipsISD::FMS: return "MipsISD::FMS";
- case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
- case MipsISD::FPCmp: return "MipsISD::FPCmp";
- case MipsISD::FSELECT: return "MipsISD::FSELECT";
- case MipsISD::MTC1_D64: return "MipsISD::MTC1_D64";
- case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
- case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
- case MipsISD::TruncIntFP: return "MipsISD::TruncIntFP";
- case MipsISD::MFHI: return "MipsISD::MFHI";
- case MipsISD::MFLO: return "MipsISD::MFLO";
- case MipsISD::MTLOHI: return "MipsISD::MTLOHI";
- case MipsISD::Mult: return "MipsISD::Mult";
- case MipsISD::Multu: return "MipsISD::Multu";
- case MipsISD::MAdd: return "MipsISD::MAdd";
- case MipsISD::MAddu: return "MipsISD::MAddu";
- case MipsISD::MSub: return "MipsISD::MSub";
- case MipsISD::MSubu: return "MipsISD::MSubu";
- case MipsISD::DivRem: return "MipsISD::DivRem";
- case MipsISD::DivRemU: return "MipsISD::DivRemU";
- case MipsISD::DivRem16: return "MipsISD::DivRem16";
- case MipsISD::DivRemU16: return "MipsISD::DivRemU16";
- case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
- case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
- case MipsISD::Wrapper: return "MipsISD::Wrapper";
- case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
- case MipsISD::Sync: return "MipsISD::Sync";
- case MipsISD::Ext: return "MipsISD::Ext";
- case MipsISD::Ins: return "MipsISD::Ins";
- case MipsISD::CIns: return "MipsISD::CIns";
- case MipsISD::LWL: return "MipsISD::LWL";
- case MipsISD::LWR: return "MipsISD::LWR";
- case MipsISD::SWL: return "MipsISD::SWL";
- case MipsISD::SWR: return "MipsISD::SWR";
- case MipsISD::LDL: return "MipsISD::LDL";
- case MipsISD::LDR: return "MipsISD::LDR";
- case MipsISD::SDL: return "MipsISD::SDL";
- case MipsISD::SDR: return "MipsISD::SDR";
- case MipsISD::EXTP: return "MipsISD::EXTP";
- case MipsISD::EXTPDP: return "MipsISD::EXTPDP";
- case MipsISD::EXTR_S_H: return "MipsISD::EXTR_S_H";
- case MipsISD::EXTR_W: return "MipsISD::EXTR_W";
- case MipsISD::EXTR_R_W: return "MipsISD::EXTR_R_W";
- case MipsISD::EXTR_RS_W: return "MipsISD::EXTR_RS_W";
- case MipsISD::SHILO: return "MipsISD::SHILO";
- case MipsISD::MTHLIP: return "MipsISD::MTHLIP";
- case MipsISD::MULSAQ_S_W_PH: return "MipsISD::MULSAQ_S_W_PH";
- case MipsISD::MAQ_S_W_PHL: return "MipsISD::MAQ_S_W_PHL";
- case MipsISD::MAQ_S_W_PHR: return "MipsISD::MAQ_S_W_PHR";
- case MipsISD::MAQ_SA_W_PHL: return "MipsISD::MAQ_SA_W_PHL";
- case MipsISD::MAQ_SA_W_PHR: return "MipsISD::MAQ_SA_W_PHR";
- case MipsISD::DOUBLE_SELECT_I: return "MipsISD::DOUBLE_SELECT_I";
- case MipsISD::DOUBLE_SELECT_I64: return "MipsISD::DOUBLE_SELECT_I64";
- case MipsISD::DPAU_H_QBL: return "MipsISD::DPAU_H_QBL";
- case MipsISD::DPAU_H_QBR: return "MipsISD::DPAU_H_QBR";
- case MipsISD::DPSU_H_QBL: return "MipsISD::DPSU_H_QBL";
- case MipsISD::DPSU_H_QBR: return "MipsISD::DPSU_H_QBR";
- case MipsISD::DPAQ_S_W_PH: return "MipsISD::DPAQ_S_W_PH";
- case MipsISD::DPSQ_S_W_PH: return "MipsISD::DPSQ_S_W_PH";
- case MipsISD::DPAQ_SA_L_W: return "MipsISD::DPAQ_SA_L_W";
- case MipsISD::DPSQ_SA_L_W: return "MipsISD::DPSQ_SA_L_W";
- case MipsISD::DPA_W_PH: return "MipsISD::DPA_W_PH";
- case MipsISD::DPS_W_PH: return "MipsISD::DPS_W_PH";
- case MipsISD::DPAQX_S_W_PH: return "MipsISD::DPAQX_S_W_PH";
- case MipsISD::DPAQX_SA_W_PH: return "MipsISD::DPAQX_SA_W_PH";
- case MipsISD::DPAX_W_PH: return "MipsISD::DPAX_W_PH";
- case MipsISD::DPSX_W_PH: return "MipsISD::DPSX_W_PH";
- case MipsISD::DPSQX_S_W_PH: return "MipsISD::DPSQX_S_W_PH";
- case MipsISD::DPSQX_SA_W_PH: return "MipsISD::DPSQX_SA_W_PH";
- case MipsISD::MULSA_W_PH: return "MipsISD::MULSA_W_PH";
- case MipsISD::MULT: return "MipsISD::MULT";
- case MipsISD::MULTU: return "MipsISD::MULTU";
- case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP";
- case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP";
- case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP";
- case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP";
- case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP";
- case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP";
- case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP";
- case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP";
- case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP";
- case MipsISD::VALL_ZERO: return "MipsISD::VALL_ZERO";
- case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO";
- case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO";
- case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO";
- case MipsISD::VCEQ: return "MipsISD::VCEQ";
- case MipsISD::VCLE_S: return "MipsISD::VCLE_S";
- case MipsISD::VCLE_U: return "MipsISD::VCLE_U";
- case MipsISD::VCLT_S: return "MipsISD::VCLT_S";
- case MipsISD::VCLT_U: return "MipsISD::VCLT_U";
- case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT";
- case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT";
- case MipsISD::VNOR: return "MipsISD::VNOR";
- case MipsISD::VSHF: return "MipsISD::VSHF";
- case MipsISD::SHF: return "MipsISD::SHF";
- case MipsISD::ILVEV: return "MipsISD::ILVEV";
- case MipsISD::ILVOD: return "MipsISD::ILVOD";
- case MipsISD::ILVL: return "MipsISD::ILVL";
- case MipsISD::ILVR: return "MipsISD::ILVR";
- case MipsISD::PCKEV: return "MipsISD::PCKEV";
- case MipsISD::PCKOD: return "MipsISD::PCKOD";
- case MipsISD::INSVE: return "MipsISD::INSVE";
- }
- return nullptr;
-}
-
MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI)
: TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) {
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 25a0bf9b797d5..7d1d8ff4cd128 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -18,6 +18,7 @@
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "Mips.h"
+#include "MipsSelectionDAGInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -50,217 +51,6 @@ class MipsTargetMachine;
class TargetLibraryInfo;
class TargetRegisterClass;
- namespace MipsISD {
-
- enum NodeType : unsigned {
- // Start the numbering from where ISD NodeType finishes.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // Jump and link (call)
- JmpLink,
-
- // Tail call
- TailCall,
-
- // Get the Highest (63-48) 16 bits from a 64-bit immediate
- Highest,
-
- // Get the Higher (47-32) 16 bits from a 64-bit immediate
- Higher,
-
- // Get the High 16 bits from a 32/64-bit immediate
- // No relation with Mips Hi register
- Hi,
-
- // Get the Lower 16 bits from a 32/64-bit immediate
- // No relation with Mips Lo register
- Lo,
-
- // Get the High 16 bits from a 32 bit immediate for accessing the GOT.
- GotHi,
-
- // Get the High 16 bits from a 32-bit immediate for accessing TLS.
- TlsHi,
-
- // Handle gp_rel (small data/bss sections) relocation.
- GPRel,
-
- // Thread Pointer
- ThreadPointer,
-
- // Vector Floating Point Multiply and Subtract
- FMS,
-
- // Floating Point Branch Conditional
- FPBrcond,
-
- // Floating Point Compare
- FPCmp,
-
- // Floating point Abs
- FAbs,
-
- // Floating point select
- FSELECT,
-
- // Node used to generate an MTC1 i32 to f64 instruction
- MTC1_D64,
-
- // Floating Point Conditional Moves
- CMovFP_T,
- CMovFP_F,
-
- // FP-to-int truncation node.
- TruncIntFP,
-
- // Return
- Ret,
-
- // Interrupt, exception, error trap Return
- ERet,
-
- // Software Exception Return.
- EH_RETURN,
-
- // Node used to extract integer from accumulator.
- MFHI,
- MFLO,
-
- // Node used to insert integers to accumulator.
- MTLOHI,
-
- // Mult nodes.
- Mult,
- Multu,
-
- // MAdd/Sub nodes
- MAdd,
- MAddu,
- MSub,
- MSubu,
-
- // DivRem(u)
- DivRem,
- DivRemU,
- DivRem16,
- DivRemU16,
-
- BuildPairF64,
- ExtractElementF64,
-
- Wrapper,
-
- DynAlloc,
-
- Sync,
-
- Ext,
- Ins,
- CIns,
-
- // EXTR.W intrinsic nodes.
- EXTP,
- EXTPDP,
- EXTR_S_H,
- EXTR_W,
- EXTR_R_W,
- EXTR_RS_W,
- SHILO,
- MTHLIP,
-
- // DPA.W intrinsic nodes.
- MULSAQ_S_W_PH,
- MAQ_S_W_PHL,
- MAQ_S_W_PHR,
- MAQ_SA_W_PHL,
- MAQ_SA_W_PHR,
- DPAU_H_QBL,
- DPAU_H_QBR,
- DPSU_H_QBL,
- DPSU_H_QBR,
- DPAQ_S_W_PH,
- DPSQ_S_W_PH,
- DPAQ_SA_L_W,
- DPSQ_SA_L_W,
- DPA_W_PH,
- DPS_W_PH,
- DPAQX_S_W_PH,
- DPAQX_SA_W_PH,
- DPAX_W_PH,
- DPSX_W_PH,
- DPSQX_S_W_PH,
- DPSQX_SA_W_PH,
- MULSA_W_PH,
-
- MULT,
- MULTU,
- MADD_DSP,
- MADDU_DSP,
- MSUB_DSP,
- MSUBU_DSP,
-
- // DSP shift nodes.
- SHLL_DSP,
- SHRA_DSP,
- SHRL_DSP,
-
- // DSP setcc and select_cc nodes.
- SETCC_DSP,
- SELECT_CC_DSP,
-
- // Vector comparisons.
- // These take a vector and return a boolean.
- VALL_ZERO,
- VANY_ZERO,
- VALL_NONZERO,
- VANY_NONZERO,
-
- // These take a vector and return a vector bitmask.
- VCEQ,
- VCLE_S,
- VCLE_U,
- VCLT_S,
- VCLT_U,
-
- // Vector Shuffle with mask as an operand
- VSHF, // Generic shuffle
- SHF, // 4-element set shuffle.
- ILVEV, // Interleave even elements
- ILVOD, // Interleave odd elements
- ILVL, // Interleave left elements
- ILVR, // Interleave right elements
- PCKEV, // Pack even elements
- PCKOD, // Pack odd elements
-
- // Vector Lane Copy
- INSVE, // Copy element from one vector to another
-
- // Combined (XOR (OR $a, $b), -1)
- VNOR,
-
- // Extended vector element extraction
- VEXTRACT_SEXT_ELT,
- VEXTRACT_ZEXT_ELT,
-
- // Double select nodes for machines without conditional-move.
- DOUBLE_SELECT_I,
- DOUBLE_SELECT_I64,
-
- // Load/Store Left/Right nodes.
- FIRST_MEMORY_OPCODE,
- LWL = FIRST_MEMORY_OPCODE,
- LWR,
- SWL,
- SWR,
- LDL,
- LDR,
- SDL,
- SDR,
- LAST_MEMORY_OPCODE = SDR,
- };
-
- } // ene namespace MipsISD
-
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
@@ -330,10 +120,6 @@ class TargetRegisterClass;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
- /// getTargetNodeName - This method returns the name of a target specific
- // DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
/// getSetCCResultType - get the ISD::SETCC result ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
diff --git a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
index 6497ac5bb2df6..ea5656d93fc85 100644
--- a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -7,13 +7,62 @@
//===----------------------------------------------------------------------===//
#include "MipsSelectionDAGInfo.h"
-#include "MipsISelLowering.h"
+
+#define GET_SDNODE_DESC
+#include "MipsGenSDNodeInfo.inc"
using namespace llvm;
+MipsSelectionDAGInfo::MipsSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(MipsGenSDNodeInfo) {}
+
MipsSelectionDAGInfo::~MipsSelectionDAGInfo() = default;
-bool MipsSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= MipsISD::FIRST_MEMORY_OPCODE &&
- Opcode <= MipsISD::LAST_MEMORY_OPCODE;
+const char *MipsSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<MipsISD::NodeType>(Opcode)) {
+ // clang-format off
+ case MipsISD::FAbs: return "MipsISD::FAbs";
+ case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
+ case MipsISD::DOUBLE_SELECT_I: return "MipsISD::DOUBLE_SELECT_I";
+ case MipsISD::DOUBLE_SELECT_I64: return "MipsISD::DOUBLE_SELECT_I64";
+ // clang-format on
+ }
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
+void MipsSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case MipsISD::ERet:
+ // invalid number of operands; expected at most 2, got 3
+ case MipsISD::DPAQX_SA_W_PH:
+ case MipsISD::DPAQX_S_W_PH:
+ case MipsISD::DPAQ_S_W_PH:
+ case MipsISD::DPAX_W_PH:
+ case MipsISD::DPA_W_PH:
+ case MipsISD::DPSQX_SA_W_PH:
+ case MipsISD::DPSQX_S_W_PH:
+ case MipsISD::DPSQ_S_W_PH:
+ case MipsISD::DPSX_W_PH:
+ case MipsISD::DPS_W_PH:
+ case MipsISD::MAQ_SA_W_PHL:
+ case MipsISD::MAQ_SA_W_PHR:
+ case MipsISD::MAQ_S_W_PHL:
+ case MipsISD::MAQ_S_W_PHR:
+ case MipsISD::MULSAQ_S_W_PH:
+ case MipsISD::MULSA_W_PH:
+ // operand #0 must have type i32, but has type v2i16
+ case MipsISD::DPAU_H_QBL:
+ case MipsISD::DPAU_H_QBR:
+ case MipsISD::DPSU_H_QBL:
+ case MipsISD::DPSU_H_QBR:
+ // operand #0 must have type i32, but has type v4i8
+ return;
+ }
+
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
diff --git a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
index 934cd2e056595..6b3682648b575 100644
--- a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -11,13 +11,35 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "MipsGenSDNodeInfo.inc"
+
namespace llvm {
+namespace MipsISD {
+
+enum NodeType : unsigned {
+ // Floating point Abs
+ FAbs = GENERATED_OPCODE_END,
+
+ DynAlloc,
+
+ // Double select nodes for machines without conditional-move.
+ DOUBLE_SELECT_I,
+ DOUBLE_SELECT_I64,
+};
+
+} // namespace MipsISD
-class MipsSelectionDAGInfo : public SelectionDAGTargetInfo {
+class MipsSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ MipsSelectionDAGInfo();
+
~MipsSelectionDAGInfo() override;
- bool isTargetMemoryOpcode(unsigned Opcode) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
};
} // namespace llvm
>From 6b3b1bde87e8c45274fed2cf3d21407bef2d745b Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:02:11 +0300
Subject: [PATCH 5/9] NVPTX
---
llvm/lib/Target/NVPTX/CMakeLists.txt | 1 +
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 +
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 106 ++--------------
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 114 ------------------
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 +-
.../Target/NVPTX/NVPTXSelectionDAGInfo.cpp | 85 ++++++++++++-
llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h | 42 ++++++-
7 files changed, 134 insertions(+), 217 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index 693f0d0b35edc..f9c24750c4836 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index d525531766ddf..055f1ff47306d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -17,6 +17,7 @@
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
+#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3e44e47c56ad7..8fc3a68de6c79 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXISelDAGToDAG.h"
+#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
@@ -1107,97 +1108,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
{MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom);
}
-const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
-
-#define MAKE_CASE(V) \
- case V: \
- return #V;
-
- switch ((NVPTXISD::NodeType)Opcode) {
- case NVPTXISD::FIRST_NUMBER:
- break;
-
- MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
- MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
- MAKE_CASE(NVPTXISD::RET_GLUE)
- MAKE_CASE(NVPTXISD::DeclareArrayParam)
- MAKE_CASE(NVPTXISD::DeclareScalarParam)
- MAKE_CASE(NVPTXISD::CALL)
- MAKE_CASE(NVPTXISD::MoveParam)
- MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
- MAKE_CASE(NVPTXISD::BUILD_VECTOR)
- MAKE_CASE(NVPTXISD::CallPrototype)
- MAKE_CASE(NVPTXISD::ProxyReg)
- MAKE_CASE(NVPTXISD::LoadV2)
- MAKE_CASE(NVPTXISD::LoadV4)
- MAKE_CASE(NVPTXISD::LoadV8)
- MAKE_CASE(NVPTXISD::LDUV2)
- MAKE_CASE(NVPTXISD::LDUV4)
- MAKE_CASE(NVPTXISD::StoreV2)
- MAKE_CASE(NVPTXISD::StoreV4)
- MAKE_CASE(NVPTXISD::StoreV8)
- MAKE_CASE(NVPTXISD::FSHL_CLAMP)
- MAKE_CASE(NVPTXISD::FSHR_CLAMP)
- MAKE_CASE(NVPTXISD::BFI)
- MAKE_CASE(NVPTXISD::PRMT)
- MAKE_CASE(NVPTXISD::FCOPYSIGN)
- MAKE_CASE(NVPTXISD::FMAXNUM3)
- MAKE_CASE(NVPTXISD::FMINNUM3)
- MAKE_CASE(NVPTXISD::FMAXIMUM3)
- MAKE_CASE(NVPTXISD::FMINIMUM3)
- MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
- MAKE_CASE(NVPTXISD::STACKRESTORE)
- MAKE_CASE(NVPTXISD::STACKSAVE)
- MAKE_CASE(NVPTXISD::SETP_F16X2)
- MAKE_CASE(NVPTXISD::SETP_BF16X2)
- MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
- MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
- MAKE_CASE(NVPTXISD::BrxEnd)
- MAKE_CASE(NVPTXISD::BrxItem)
- MAKE_CASE(NVPTXISD::BrxStart)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y)
- MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
- MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
- MAKE_CASE(
- NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
- MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF)
- MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF)
- }
- return nullptr;
-
-#undef MAKE_CASE
-}
-
TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
@@ -2032,7 +1942,7 @@ static ISD::NodeType getScalarOpcodeForReduction(unsigned ReductionOpcode) {
}
/// Get 3-input scalar reduction opcode
-static std::optional<NVPTXISD::NodeType>
+static std::optional<unsigned>
getScalar3OpcodeForReduction(unsigned ReductionOpcode) {
switch (ReductionOpcode) {
case ISD::VECREDUCE_FMAX:
@@ -2931,7 +2841,7 @@ static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
using NVPTX::PTXCvtMode::CvtMode;
auto [OpCode, RetTy, CvtModeFlag] =
- [&]() -> std::tuple<NVPTXISD::NodeType, MVT::SimpleValueType, uint32_t> {
+ [&]() -> std::tuple<unsigned, MVT::SimpleValueType, uint32_t> {
switch (IntrinsicID) {
case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8,
@@ -3314,7 +3224,7 @@ SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
// Generate BrxEnd nodes
SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
IdV, Chain.getValue(1)};
- SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps);
+ SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);
return BrxEnd;
}
@@ -5457,7 +5367,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(LD);
// the new opcode after we double the number of operands
- NVPTXISD::NodeType Opcode;
+ unsigned Opcode;
SmallVector<SDValue> Operands(LD->ops());
unsigned OldNumOutputs; // non-glue, non-chain outputs
switch (LD->getOpcode()) {
@@ -5540,7 +5450,7 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
auto *ST = cast<MemSDNode>(N);
// The new opcode after we double the number of operands.
- NVPTXISD::NodeType Opcode;
+ unsigned Opcode;
switch (N->getOpcode()) {
case ISD::STORE:
// Any packed type is legal, so the legalizer will not have lowered
@@ -5675,7 +5585,7 @@ static SDValue PerformFADDCombine(SDNode *N,
}
/// Get 3-input version of a 2-input min/max opcode
-static NVPTXISD::NodeType getMinMax3Opcode(unsigned MinMax2Opcode) {
+static unsigned getMinMax3Opcode(unsigned MinMax2Opcode) {
switch (MinMax2Opcode) {
case ISD::FMAXNUM:
case ISD::FMAXIMUMNUM:
@@ -5706,7 +5616,7 @@ static SDValue PerformFMinMaxCombine(SDNode *N,
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
unsigned MinMaxOp2 = N->getOpcode();
- NVPTXISD::NodeType MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
+ unsigned MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
if (Op0.getOpcode() == MinMaxOp2 && Op0.hasOneUse()) {
// (maxnum (maxnum a, b), c) -> (maxnum3 a, b, c)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 63fa0bb9159ff..d71a86fd463f6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -20,118 +20,6 @@
#include "llvm/Support/AtomicOrdering.h"
namespace llvm {
-namespace NVPTXISD {
-enum NodeType : unsigned {
- // Start the numbering from where ISD NodeType finishes.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_GLUE,
-
- /// These nodes represent a parameter declaration. In PTX this will look like:
- /// .param .align 16 .b8 param0[1024];
- /// .param .b32 retval0;
- ///
- /// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
- /// DeclareScalarParam(Chain, Externalsym, Size, Glue)
- DeclareScalarParam,
- DeclareArrayParam,
-
- /// This node represents a PTX call instruction. It's operands are as follows:
- ///
- /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
- /// NumParams, Callee, Proto)
- CALL,
-
- MoveParam,
- CallPrototype,
- ProxyReg,
- FSHL_CLAMP,
- FSHR_CLAMP,
- MUL_WIDE_SIGNED,
- MUL_WIDE_UNSIGNED,
- SETP_F16X2,
- SETP_BF16X2,
- BFI,
- PRMT,
-
- /// This node is similar to ISD::BUILD_VECTOR except that the output may be
- /// implicitly bitcast to a scalar. This allows for the representation of
- /// packing move instructions for vector types which are not legal i.e. v2i32
- BUILD_VECTOR,
-
- /// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value
- /// which may be a scalar and unpacks it into multiple values by implicitly
- /// converting it to a vector.
- UNPACK_VECTOR,
-
- FCOPYSIGN,
- FMAXNUM3,
- FMINNUM3,
- FMAXIMUM3,
- FMINIMUM3,
-
- DYNAMIC_STACKALLOC,
- STACKRESTORE,
- STACKSAVE,
- BrxStart,
- BrxItem,
- BrxEnd,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
- CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
- CVT_E4M3X4_F32X4_RS_SF,
- CVT_E5M2X4_F32X4_RS_SF,
- CVT_E2M3X4_F32X4_RS_SF,
- CVT_E3M2X4_F32X4_RS_SF,
- CVT_E2M1X4_F32X4_RS_SF,
-
- FIRST_MEMORY_OPCODE,
-
- /// These nodes are used to lower atomic instructions with i128 type. They are
- /// similar to the generic nodes, but the input and output values are split
- /// into two 64-bit values.
- /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
- /// swapLo, swapHi)
- /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
- ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
- ATOMIC_SWAP_B128,
-
- LoadV2,
- LoadV4,
- LoadV8,
- LDUV2, // LDU.v2
- LDUV4, // LDU.v4
- StoreV2,
- StoreV4,
- StoreV8,
- TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
- LAST_MEMORY_OPCODE =
- TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
-};
-}
class NVPTXSubtarget;
@@ -144,8 +32,6 @@ class NVPTXTargetLowering : public TargetLowering {
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- const char *getTargetNodeName(unsigned Opcode) const override;
-
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index f0bdf472b96ed..260d0d5494869 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1754,7 +1754,7 @@ def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def CallPrototype :
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPSideEffect]>;
def ProtoIdent : Operand<i32> { let PrintMethod = "printProtoIdent"; }
def CALL_PROTOTYPE :
NVPTXInst<(outs), (ins ProtoIdent:$ident),
diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
index d2035c6f8166f..05af7dac3fd78 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
@@ -7,13 +7,92 @@
//===----------------------------------------------------------------------===//
#include "NVPTXSelectionDAGInfo.h"
-#include "NVPTXISelLowering.h"
+
+#define GET_SDNODE_DESC
+#include "NVPTXGenSDNodeInfo.inc"
using namespace llvm;
+NVPTXSelectionDAGInfo::NVPTXSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(NVPTXGenSDNodeInfo) {}
+
NVPTXSelectionDAGInfo::~NVPTXSelectionDAGInfo() = default;
+const char *NVPTXSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V) \
+ case V: \
+ return #V;
+
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<NVPTXISD::NodeType>(Opcode)) {
+ MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
+ MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
+ MAKE_CASE(NVPTXISD::LoadV2)
+ MAKE_CASE(NVPTXISD::LoadV4)
+ MAKE_CASE(NVPTXISD::LoadV8)
+ MAKE_CASE(NVPTXISD::LDUV2)
+ MAKE_CASE(NVPTXISD::LDUV4)
+ MAKE_CASE(NVPTXISD::StoreV2)
+ MAKE_CASE(NVPTXISD::StoreV4)
+ MAKE_CASE(NVPTXISD::StoreV8)
+ MAKE_CASE(NVPTXISD::SETP_F16X2)
+ MAKE_CASE(NVPTXISD::SETP_BF16X2)
+ MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
+ }
+#undef MAKE_CASE
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
bool NVPTXSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE &&
- Opcode <= NVPTXISD::LAST_MEMORY_OPCODE;
+ // These nodes don't have corresponding entries in *.td files.
+ if (Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE &&
+ Opcode <= NVPTXISD::LAST_MEMORY_OPCODE)
+ return true;
+
+ // These nodes lack SDNPMemOperand property.
+ switch (Opcode) {
+ default:
+ break;
+ case NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT:
+ case NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT:
+ return true;
+ }
+
+ return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
+}
+
+void NVPTXSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case NVPTXISD::ProxyReg:
+ // invalid number of results; expected 2, got 1
+ return;
+ }
+
+ return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
index 9d69f48026c79..07c130baeaa4f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
@@ -11,13 +11,53 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "NVPTXGenSDNodeInfo.inc"
+
namespace llvm {
+namespace NVPTXISD {
+
+enum NodeType : unsigned {
+ SETP_F16X2 = GENERATED_OPCODE_END,
+ SETP_BF16X2,
+ UNPACK_VECTOR,
+
+ FIRST_MEMORY_OPCODE,
+
+ /// These nodes are used to lower atomic instructions with i128 type. They are
+ /// similar to the generic nodes, but the input and output values are split
+ /// into two 64-bit values.
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
+ /// swapLo, swapHi)
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
+ ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
+ ATOMIC_SWAP_B128,
+
+ LoadV2,
+ LoadV4,
+ LoadV8,
+ LDUV2, // LDU.v2
+ LDUV4, // LDU.v4
+ StoreV2,
+ StoreV4,
+ StoreV8,
+ LAST_MEMORY_OPCODE = StoreV8,
+};
+
+} // namespace NVPTXISD
-class NVPTXSelectionDAGInfo : public SelectionDAGTargetInfo {
+class NVPTXSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ NVPTXSelectionDAGInfo();
+
~NVPTXSelectionDAGInfo() override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
bool isTargetMemoryOpcode(unsigned Opcode) const override;
+
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
};
} // namespace llvm
>From fdf5d84a6fa5a7da6a522cadbaafcc606c6f7606 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:02:16 +0300
Subject: [PATCH 6/9] PowerPC
---
llvm/lib/Target/PowerPC/CMakeLists.txt | 1 +
llvm/lib/Target/PowerPC/PPCFastISel.cpp | 1 +
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 1 +
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 185 +-----
llvm/lib/Target/PowerPC/PPCISelLowering.h | 578 ------------------
llvm/lib/Target/PowerPC/PPCInstrInfo.td | 36 +-
.../Target/PowerPC/PPCSelectionDAGInfo.cpp | 66 +-
llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h | 57 +-
8 files changed, 138 insertions(+), 787 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 2182039e0eef8..53d565013c4bc 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -11,6 +11,7 @@ tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel)
tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM PPCGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM PPCGenExegesis.inc -gen-exegesis)
tablegen(LLVM PPCGenRegisterBank.inc -gen-register-bank)
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index ea4e597d0fd7d..ca3fe18273ff5 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -17,6 +17,7 @@
#include "PPCCallingConv.h"
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 89165fa8f8fdb..dd537c204cec1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,6 +16,7 @@
#include "PPC.h"
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/APInt.h"
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f55336bafd251..220010c4d3d34 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -20,6 +20,7 @@
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCRegisterInfo.h"
+#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/APFloat.h"
@@ -1678,190 +1679,6 @@ bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
return false;
}
-const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((PPCISD::NodeType)Opcode) {
- case PPCISD::FIRST_NUMBER: break;
- case PPCISD::FSEL: return "PPCISD::FSEL";
- case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
- case PPCISD::XSMINC: return "PPCISD::XSMINC";
- case PPCISD::FCFID: return "PPCISD::FCFID";
- case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
- case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
- case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
- case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
- case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
- case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
- case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
- case PPCISD::FRE: return "PPCISD::FRE";
- case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
- case PPCISD::FTSQRT:
- return "PPCISD::FTSQRT";
- case PPCISD::FSQRT:
- return "PPCISD::FSQRT";
- case PPCISD::STFIWX: return "PPCISD::STFIWX";
- case PPCISD::VPERM: return "PPCISD::VPERM";
- case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
- case PPCISD::XXSPLTI_SP_TO_DP:
- return "PPCISD::XXSPLTI_SP_TO_DP";
- case PPCISD::XXSPLTI32DX:
- return "PPCISD::XXSPLTI32DX";
- case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
- case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
- case PPCISD::XXPERM:
- return "PPCISD::XXPERM";
- case PPCISD::VECSHL: return "PPCISD::VECSHL";
- case PPCISD::VSRQ:
- return "PPCISD::VSRQ";
- case PPCISD::CMPB: return "PPCISD::CMPB";
- case PPCISD::Hi: return "PPCISD::Hi";
- case PPCISD::Lo: return "PPCISD::Lo";
- case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
- case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
- case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
- case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
- case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
- case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
- case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
- case PPCISD::SRL: return "PPCISD::SRL";
- case PPCISD::SRA: return "PPCISD::SRA";
- case PPCISD::SHL: return "PPCISD::SHL";
- case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
- case PPCISD::CALL: return "PPCISD::CALL";
- case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
- case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
- case PPCISD::CALL_RM:
- return "PPCISD::CALL_RM";
- case PPCISD::CALL_NOP_RM:
- return "PPCISD::CALL_NOP_RM";
- case PPCISD::CALL_NOTOC_RM:
- return "PPCISD::CALL_NOTOC_RM";
- case PPCISD::MTCTR: return "PPCISD::MTCTR";
- case PPCISD::BCTRL: return "PPCISD::BCTRL";
- case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
- case PPCISD::BCTRL_RM:
- return "PPCISD::BCTRL_RM";
- case PPCISD::BCTRL_LOAD_TOC_RM:
- return "PPCISD::BCTRL_LOAD_TOC_RM";
- case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
- case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
- case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
- case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
- case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
- case PPCISD::MFVSR: return "PPCISD::MFVSR";
- case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
- case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
- case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
- case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
- case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
- return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
- case PPCISD::ANDI_rec_1_EQ_BIT:
- return "PPCISD::ANDI_rec_1_EQ_BIT";
- case PPCISD::ANDI_rec_1_GT_BIT:
- return "PPCISD::ANDI_rec_1_GT_BIT";
- case PPCISD::VCMP: return "PPCISD::VCMP";
- case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
- case PPCISD::LBRX: return "PPCISD::LBRX";
- case PPCISD::STBRX: return "PPCISD::STBRX";
- case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
- case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
- case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
- case PPCISD::STXSIX: return "PPCISD::STXSIX";
- case PPCISD::VEXTS: return "PPCISD::VEXTS";
- case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
- case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
- case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
- case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
- case PPCISD::ST_VSR_SCAL_INT:
- return "PPCISD::ST_VSR_SCAL_INT";
- case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
- case PPCISD::BDNZ: return "PPCISD::BDNZ";
- case PPCISD::BDZ: return "PPCISD::BDZ";
- case PPCISD::MFFS: return "PPCISD::MFFS";
- case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
- case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
- case PPCISD::CR6SET: return "PPCISD::CR6SET";
- case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
- case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
- case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
- case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
- case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
- case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
- case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
- case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
- case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
- case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
- case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
- case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
- case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
- case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
- case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
- case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
- case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
- case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
- case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
- case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
- case PPCISD::PADDI_DTPREL:
- return "PPCISD::PADDI_DTPREL";
- case PPCISD::VADD_SPLAT:
- return "PPCISD::VADD_SPLAT";
- case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
- case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
- case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
- case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
- case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
- case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
- case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
- case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
- case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
- case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
- return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
- case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
- return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
- case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
- case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
- case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
- case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
- case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
- case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
- case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
- case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
- case PPCISD::STRICT_FADDRTZ:
- return "PPCISD::STRICT_FADDRTZ";
- case PPCISD::STRICT_FCTIDZ:
- return "PPCISD::STRICT_FCTIDZ";
- case PPCISD::STRICT_FCTIWZ:
- return "PPCISD::STRICT_FCTIWZ";
- case PPCISD::STRICT_FCTIDUZ:
- return "PPCISD::STRICT_FCTIDUZ";
- case PPCISD::STRICT_FCTIWUZ:
- return "PPCISD::STRICT_FCTIWUZ";
- case PPCISD::STRICT_FCFID:
- return "PPCISD::STRICT_FCFID";
- case PPCISD::STRICT_FCFIDU:
- return "PPCISD::STRICT_FCFIDU";
- case PPCISD::STRICT_FCFIDS:
- return "PPCISD::STRICT_FCFIDS";
- case PPCISD::STRICT_FCFIDUS:
- return "PPCISD::STRICT_FCFIDUS";
- case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
- case PPCISD::STORE_COND:
- return "PPCISD::STORE_COND";
- case PPCISD::SETBC:
- return "PPCISD::SETBC";
- case PPCISD::SETBCR:
- return "PPCISD::SETBCR";
- case PPCISD::ADDC:
- return "PPCISD::ADDC";
- case PPCISD::ADDE:
- return "PPCISD::ADDE";
- case PPCISD::SUBC:
- return "PPCISD::SUBC";
- case PPCISD::SUBE:
- return "PPCISD::SUBE";
- }
- return nullptr;
-}
-
EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
EVT VT) const {
if (!VT.isVector())
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d967018982734..680b529b4e2e5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -34,580 +34,6 @@
namespace llvm {
- namespace PPCISD {
-
- // When adding a NEW PPCISD node please add it to the correct position in
- // the enum. The order of elements in this enum matters!
- // Values that are added between FIRST_MEMORY_OPCODE and LAST_MEMORY_OPCODE
- // are considered memory opcodes and are treated differently than other
- // entries.
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- /// FSEL - Traditional three-operand fsel node.
- ///
- FSEL,
-
- /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
- XSMAXC,
- XSMINC,
-
- /// FCFID - The FCFID instruction, taking an f64 operand and producing
- /// and f64 value containing the FP representation of the integer that
- /// was temporarily in the f64 operand.
- FCFID,
-
- /// Newer FCFID[US] integer-to-floating-point conversion instructions for
- /// unsigned integers and single-precision outputs.
- FCFIDU,
- FCFIDS,
- FCFIDUS,
-
- /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
- /// operand, producing an f64 value containing the integer representation
- /// of that FP value.
- FCTIDZ,
- FCTIWZ,
-
- /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
- /// unsigned integers with round toward zero.
- FCTIDUZ,
- FCTIWUZ,
-
- /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
- /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
- VEXTS,
-
- /// Reciprocal estimate instructions (unary FP ops).
- FRE,
- FRSQRTE,
-
- /// Test instruction for software square root.
- FTSQRT,
-
- /// Square root instruction.
- FSQRT,
-
- /// VPERM - The PPC VPERM Instruction.
- ///
- VPERM,
-
- /// XXSPLT - The PPC VSX splat instructions
- ///
- XXSPLT,
-
- /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
- /// converting immediate single precision numbers to double precision
- /// vector or scalar.
- XXSPLTI_SP_TO_DP,
-
- /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
- ///
- XXSPLTI32DX,
-
- /// VECINSERT - The PPC vector insert instruction
- ///
- VECINSERT,
-
- /// VECSHL - The PPC vector shift left instruction
- ///
- VECSHL,
-
- /// XXPERMDI - The PPC XXPERMDI instruction
- ///
- XXPERMDI,
- XXPERM,
-
- /// The CMPB instruction (takes two operands of i32 or i64).
- CMPB,
-
- /// Hi/Lo - These represent the high and low 16-bit parts of a global
- /// address respectively. These nodes have two operands, the first of
- /// which must be a TargetGlobalAddress, and the second of which must be a
- /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
- /// though these are usually folded into other nodes.
- Hi,
- Lo,
-
- /// The following two target-specific nodes are used for calls through
- /// function pointers in the 64-bit SVR4 ABI.
-
- /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
- /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
- /// compute an allocation on the stack.
- DYNALLOC,
-
- /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
- /// compute an offset from native SP to the address of the most recent
- /// dynamic alloca.
- DYNAREAOFFSET,
-
- /// To avoid stack clash, allocation is performed by block and each block is
- /// probed.
- PROBED_ALLOCA,
-
- /// The result of the mflr at function entry, used for PIC code.
- GlobalBaseReg,
-
- /// These nodes represent PPC shifts.
- ///
- /// For scalar types, only the last `n + 1` bits of the shift amounts
- /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
- /// for exact behaviors.
- ///
- /// For vector types, only the last n bits are used. See vsld.
- SRL,
- SRA,
- SHL,
-
- /// These nodes represent PPC arithmetic operations with carry.
- ADDC,
- ADDE,
- SUBC,
- SUBE,
-
- /// FNMSUB - Negated multiply-subtract instruction.
- FNMSUB,
-
- /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
- /// word and shift left immediate.
- EXTSWSLI,
-
- /// The combination of sra[wd]i and addze used to implemented signed
- /// integer division by a power of 2. The first operand is the dividend,
- /// and the second is the constant shift amount (representing the
- /// divisor).
- SRA_ADDZE,
-
- /// CALL - A direct function call.
- /// CALL_NOP is a call with the special NOP which follows 64-bit
- /// CALL_NOTOC the caller does not use the TOC.
- /// SVR4 calls and 32-bit/64-bit AIX calls.
- CALL,
- CALL_NOP,
- CALL_NOTOC,
-
- /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
- /// MTCTR instruction.
- MTCTR,
-
- /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
- /// BCTRL instruction.
- BCTRL,
-
- /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
- /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
- /// and 64-bit AIX.
- BCTRL_LOAD_TOC,
-
- /// The variants that implicitly define rounding mode for calls with
- /// strictfp semantics.
- CALL_RM,
- CALL_NOP_RM,
- CALL_NOTOC_RM,
- BCTRL_RM,
- BCTRL_LOAD_TOC_RM,
-
- /// Return with a glue operand, matched by 'blr'
- RET_GLUE,
-
- /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
- /// This copies the bits corresponding to the specified CRREG into the
- /// resultant GPR. Bits corresponding to other CR regs are undefined.
- MFOCRF,
-
- /// Direct move from a VSX register to a GPR
- MFVSR,
-
- /// Direct move from a GPR to a VSX register (algebraic)
- MTVSRA,
-
- /// Direct move from a GPR to a VSX register (zero)
- MTVSRZ,
-
- /// Direct move of 2 consecutive GPR to a VSX register.
- BUILD_FP128,
-
- /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
- /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
- /// unsupported for this target.
- /// Merge 2 GPRs to a single SPE register.
- BUILD_SPE64,
-
- /// Extract SPE register component, second argument is high or low.
- EXTRACT_SPE,
-
- /// Extract a subvector from signed integer vector and convert to FP.
- /// It is primarily used to convert a (widened) illegal integer vector
- /// type to a legal floating point vector type.
- /// For example v2i32 -> widened to v4i32 -> v2f64
- SINT_VEC_TO_FP,
-
- /// Extract a subvector from unsigned integer vector and convert to FP.
- /// As with SINT_VEC_TO_FP, used for converting illegal types.
- UINT_VEC_TO_FP,
-
- /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to
- /// place the value into the least significant element of the most
- /// significant doubleword in the vector. This is not element zero for
- /// anything smaller than a doubleword on either endianness. This node has
- /// the same semantics as SCALAR_TO_VECTOR except that the value remains in
- /// the aforementioned location in the vector register.
- SCALAR_TO_VECTOR_PERMUTED,
-
- // FIXME: Remove these once the ANDI glue bug is fixed:
- /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
- /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
- /// implement truncation of i32 or i64 to i1.
- ANDI_rec_1_EQ_BIT,
- ANDI_rec_1_GT_BIT,
-
- // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
- // target (returns (Lo, Hi)). It takes a chain operand.
- READ_TIME_BASE,
-
- // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
- EH_SJLJ_SETJMP,
-
- // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
- EH_SJLJ_LONGJMP,
-
- /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
- /// instructions. For lack of better number, we use the opcode number
- /// encoding for the OPC field to identify the compare. For example, 838
- /// is VCMPGTSH.
- VCMP,
-
- /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the
- /// altivec VCMP*_rec instructions. For lack of better number, we use the
- /// opcode number encoding for the OPC field to identify the compare. For
- /// example, 838 is VCMPGTSH.
- VCMP_rec,
-
- /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
- /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
- /// condition register to branch on, OPC is the branch opcode to use (e.g.
- /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
- /// an optional input flag argument.
- COND_BRANCH,
-
- /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
- /// loops.
- BDNZ,
- BDZ,
-
- /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
- /// towards zero. Used only as part of the long double-to-int
- /// conversion sequence.
- FADDRTZ,
-
- /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
- MFFS,
-
- /// TC_RETURN - A tail call return.
- /// operand #0 chain
- /// operand #1 callee (register or absolute)
- /// operand #2 stack adjustment
- /// operand #3 optional in flag
- TC_RETURN,
-
- /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
- CR6SET,
- CR6UNSET,
-
- /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
- /// for non-position independent code on PPC32.
- PPC32_GOT,
-
- /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
- /// local dynamic TLS and position indendepent code on PPC32.
- PPC32_PICGOT,
-
- /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
- /// TLS model, produces an ADDIS8 instruction that adds the GOT
- /// base to sym\@got\@tprel\@ha.
- ADDIS_GOT_TPREL_HA,
-
- /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
- /// TLS model, produces a LD instruction with base register G8RReg
- /// and offset sym\@got\@tprel\@l. This completes the addition that
- /// finds the offset of "sym" relative to the thread pointer.
- LD_GOT_TPREL_L,
-
- /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec
- /// and local-exec TLS models, produces an ADD instruction that adds
- /// the contents of G8RReg to the thread pointer. Symbol contains a
- /// relocation sym\@tls which is to be replaced by the thread pointer
- /// and identifies to the linker that the instruction is part of a
- /// TLS sequence.
- ADD_TLS,
-
- /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym\@got\@tlsgd\@ha.
- ADDIS_TLSGD_HA,
-
- /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
- /// ADDIS_TLSGD_L_ADDR until after register assignment.
- ADDI_TLSGD_L,
-
- /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
- /// ADDIS_TLSGD_L_ADDR until after register assignment.
- GET_TLS_ADDR,
-
- /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on
- /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread
- /// pointer. At the end of the call, the thread pointer is found in R3.
- GET_TPOINTER,
-
- /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
- /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
- /// register assignment.
- ADDI_TLSGD_L_ADDR,
-
- /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
- /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
- /// Op that combines two register copies of TOC entries
- /// (region handle into R3 and variable offset into R4) followed by a
- /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.
- /// This node is used in 64-bit mode as well (in which case the result is
- /// G8RC and inputs are X3/X4).
- TLSGD_AIX,
-
- /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
- /// produces a call to .__tls_get_mod(_$TLSML\@ml).
- GET_TLS_MOD_AIX,
-
- /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle)
- /// Op that requires a single input of the module handle TOC entry in R3,
- /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call
- /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes.
- /// The only difference is the register class.
- TLSLD_AIX,
-
- /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym\@got\@tlsld\@ha.
- ADDIS_TLSLD_HA,
-
- /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
- /// ADDIS_TLSLD_L_ADDR until after register assignment.
- ADDI_TLSLD_L,
-
- /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
- /// ADDIS_TLSLD_L_ADDR until after register assignment.
- GET_TLSLD_ADDR,
-
- /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
- /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
- /// following register assignment.
- ADDI_TLSLD_L_ADDR,
-
- /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds X3 to
- /// sym\@dtprel\@ha.
- ADDIS_DTPREL_HA,
-
- /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@dtprel\@l.
- ADDI_DTPREL_L,
-
- /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS
- /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.
- PADDI_DTPREL,
-
- /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
- /// during instruction selection to optimize a BUILD_VECTOR into
- /// operations on splats. This is necessary to avoid losing these
- /// optimizations due to constant folding.
- VADD_SPLAT,
-
- /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
- /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
- /// or stxvd2x instruction. The chain is necessary because the
- /// sequence replaces a load and needs to provide the same number
- /// of outputs.
- XXSWAPD,
-
- /// An SDNode for swaps that are not associated with any loads/stores
- /// and thereby have no chain.
- SWAP_NO_CHAIN,
-
- /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
- /// lower (IDX=1) half of v4f32 to v2f64.
- FP_EXTEND_HALF,
-
- /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done
- /// either through an add like PADDI or through a PC Relative load like
- /// PLD.
- MAT_PCREL_ADDR,
-
- /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for
- /// TLS global address when using dynamic access models. This can be done
- /// through an add like PADDI.
- TLS_DYNAMIC_MAT_PCREL_ADDR,
-
- /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address
- /// when using local exec access models, and when prefixed instructions are
- /// available. This is used with ADD_TLS to produce an add like PADDI.
- TLS_LOCAL_EXEC_MAT_ADDR,
-
- /// ACC_BUILD = Build an accumulator register from 4 VSX registers.
- ACC_BUILD,
-
- /// PAIR_BUILD = Build a vector pair register from 2 VSX registers.
- PAIR_BUILD,
-
- /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of
- /// an accumulator or pair register. This node is needed because
- /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same
- /// element type.
- EXTRACT_VSX_REG,
-
- /// XXMFACC = This corresponds to the xxmfacc instruction.
- XXMFACC,
-
- // Constrained conversion from floating point to int
- FIRST_STRICTFP_OPCODE,
- STRICT_FCTIDZ = FIRST_STRICTFP_OPCODE,
- STRICT_FCTIWZ,
- STRICT_FCTIDUZ,
- STRICT_FCTIWUZ,
-
- /// Constrained integer-to-floating-point conversion instructions.
- STRICT_FCFID,
- STRICT_FCFIDU,
- STRICT_FCFIDS,
- STRICT_FCFIDUS,
-
- /// Constrained floating point add in round-to-zero mode.
- STRICT_FADDRTZ,
- LAST_STRICTFP_OPCODE = STRICT_FADDRTZ,
-
- /// SETBC - The ISA 3.1 (P10) SETBC instruction.
- SETBC,
-
- /// SETBCR - The ISA 3.1 (P10) SETBCR instruction.
- SETBCR,
-
- /// VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction
- VSRQ,
-
- // NOTE: The nodes below may require PC-Rel specific patterns if the
- // address could be PC-Relative. When adding new nodes below, consider
- // whether or not the address can be PC-Relative and add the corresponding
- // PC-relative patterns and tests.
-
- /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
- /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
- /// the GPRC input, then stores it through Ptr. Type can be either i16 or
- /// i32.
- FIRST_MEMORY_OPCODE,
- STBRX = FIRST_MEMORY_OPCODE,
-
- /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
- /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
- /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
- /// or i32.
- LBRX,
-
- /// STFIWX - The STFIWX instruction. The first operand is an input token
- /// chain, then an f64 value to store, then an address to store it to.
- STFIWX,
-
- /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
- /// load which sign-extends from a 32-bit integer value into the
- /// destination 64-bit register.
- LFIWAX,
-
- /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
- /// load which zero-extends from a 32-bit integer value into the
- /// destination 64-bit register.
- LFIWZX,
-
- /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
- /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
- /// This can be used for converting loaded integers to floating point.
- LXSIZX,
-
- /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
- /// chain, then an f64 value to store, then an address to store it to,
- /// followed by a byte-width for the store.
- STXSIX,
-
- /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
- /// Maps directly to an lxvd2x instruction that will be followed by
- /// an xxswapd.
- LXVD2X,
-
- /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
- /// This node represents v1i128 BUILD_VECTOR of a zero extending load
- /// instruction from <byte, halfword, word, or doubleword> to i128.
- /// Allows utilization of the Load VSX Vector Rightmost Instructions.
- LXVRZX,
-
- /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
- /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
- /// the vector type to load vector in big-endian element order.
- LOAD_VEC_BE,
-
- /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
- /// v2f32 value into the lower half of a VSR register.
- LD_VSX_LH,
-
- /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
- /// instructions such as LXVDSX, LXVWSX.
- LD_SPLAT,
-
- /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
- /// that zero-extends.
- ZEXT_LD_SPLAT,
-
- /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
- /// that sign-extends.
- SEXT_LD_SPLAT,
-
- /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
- /// Maps directly to an stxvd2x instruction that will be preceded by
- /// an xxswapd.
- STXVD2X,
-
- /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
- /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
- /// the vector type to store vector in big-endian element order.
- STORE_VEC_BE,
-
- /// Store scalar integers from VSR.
- ST_VSR_SCAL_INT,
-
- /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
- /// except they ensure that the compare input is zero-extended for
- /// sub-word versions because the atomic loads zero-extend.
- ATOMIC_CMP_SWAP_8,
- ATOMIC_CMP_SWAP_16,
-
- /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr
- /// The store conditional instruction ST[BHWD]ARX that produces a glue
- /// result to attach it to a conditional branch.
- STORE_COND,
-
- /// GPRC = TOC_ENTRY GA, TOC
- /// Loads the entry for GA from the TOC, where the TOC base is given by
- /// the last operand.
- TOC_ENTRY,
- LAST_MEMORY_OPCODE = TOC_ENTRY,
- };
-
- } // end namespace PPCISD
-
/// Define some predicates that are used for node matching.
namespace PPC {
@@ -752,10 +178,6 @@ namespace llvm {
explicit PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI);
- /// getTargetNodeName() - This method returns the name of a target specific
- /// DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
bool isSelectSupported(SelectSupportKind Kind) const override {
// PowerPC does not support scalar condition selects on vectors.
return (Kind != SelectSupportKind::ScalarCondVectorVal);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index f3998113ddd52..ba730ed394317 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -163,14 +163,16 @@ def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCvsrq: SDNode<"PPCISD::VSRQ", SDT_PPCVecShiftQuad, []>;
-def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID",
- SDTFPUnaryOp, [SDNPHasChain]>;
-def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU",
- SDTFPUnaryOp, [SDNPHasChain]>;
-def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS",
- SDTFPRoundOp, [SDNPHasChain]>;
-def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS",
- SDTFPRoundOp, [SDNPHasChain]>;
+let IsStrictFP = true in {
+ def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+ def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+ def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS",
+ SDTFPRoundOp, [SDNPHasChain]>;
+ def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS",
+ SDTFPRoundOp, [SDNPHasChain]>;
+}
def PPCany_fcfid : PatFrags<(ops node:$op),
[(PPCfcfid node:$op),
@@ -207,6 +209,8 @@ def PPCmffs : SDNode<"PPCISD::MFFS",
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+
+let IsStrictFP = true in
def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp,
[SDNPHasChain]>;
@@ -272,14 +276,16 @@ def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>;
def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
-def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ",
- SDTFPUnaryOp, [SDNPHasChain]>;
-def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ",
- SDTFPUnaryOp, [SDNPHasChain]>;
-def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ",
- SDTFPUnaryOp, [SDNPHasChain]>;
-def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ",
+let IsStrictFP = true in {
+ def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+ def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ",
SDTFPUnaryOp, [SDNPHasChain]>;
+ def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+ def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+}
def PPCany_fctidz : PatFrags<(ops node:$op),
[(PPCstrict_fctidz node:$op),
diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index 93a4693c50168..80aa1122167df 100644
--- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -7,20 +7,72 @@
//===----------------------------------------------------------------------===//
#include "PPCSelectionDAGInfo.h"
-#include "PPCISelLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+#define GET_SDNODE_DESC
+#include "PPCGenSDNodeInfo.inc"
using namespace llvm;
+PPCSelectionDAGInfo::PPCSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(PPCGenSDNodeInfo) {}
+
PPCSelectionDAGInfo::~PPCSelectionDAGInfo() = default;
-bool PPCSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= PPCISD::FIRST_MEMORY_OPCODE &&
- Opcode <= PPCISD::LAST_MEMORY_OPCODE;
+const char *PPCSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+ switch (static_cast<PPCISD::NodeType>(Opcode)) {
+ case PPCISD::GlobalBaseReg:
+ return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRA_ADDZE:
+ return "PPCISD::SRA_ADDZE";
+ case PPCISD::READ_TIME_BASE:
+ return "PPCISD::READ_TIME_BASE";
+ case PPCISD::MFOCRF:
+ return "PPCISD::MFOCRF";
+ case PPCISD::ANDI_rec_1_EQ_BIT:
+ return "PPCISD::ANDI_rec_1_EQ_BIT";
+ case PPCISD::ANDI_rec_1_GT_BIT:
+ return "PPCISD::ANDI_rec_1_GT_BIT";
+ case PPCISD::BDNZ:
+ return "PPCISD::BDNZ";
+ case PPCISD::BDZ:
+ return "PPCISD::BDZ";
+ case PPCISD::PPC32_PICGOT:
+ return "PPCISD::PPC32_PICGOT";
+ case PPCISD::VADD_SPLAT:
+ return "PPCISD::VADD_SPLAT";
+ }
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
}
-bool PPCSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
- return Opcode >= PPCISD::FIRST_STRICTFP_OPCODE &&
- Opcode <= PPCISD::LAST_STRICTFP_OPCODE;
+void PPCSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case PPCISD::DYNAREAOFFSET:
+ // invalid number of results; expected 2, got 1
+ case PPCISD::TOC_ENTRY:
+ // invalid number of results; expected 1, got 2
+ case PPCISD::STORE_COND:
+ // invalid number of results; expected 2, got 3
+ case PPCISD::LD_SPLAT:
+ case PPCISD::SEXT_LD_SPLAT:
+ case PPCISD::ZEXT_LD_SPLAT:
+ // invalid number of operands; expected 2, got 3
+ case PPCISD::ST_VSR_SCAL_INT:
+ // invalid number of operands; expected 4, got 5
+ case PPCISD::XXPERM:
+ // operand #1 must have type v2f64, but has type v16i8
+ case PPCISD::ACC_BUILD:
+ // operand #3 must have type v4i32, but has type v16i8
+ case PPCISD::PAIR_BUILD:
+ // operand #1 must have type v4i32, but has type v16i8
+ return;
+ }
+
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
std::pair<SDValue, SDValue> PPCSelectionDAGInfo::EmitTargetCodeForMemcmp(
diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index f962a7a5321aa..ffe8982ce1af4 100644
--- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -11,15 +11,66 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "PPCGenSDNodeInfo.inc"
+
namespace llvm {
+namespace PPCISD {
+
+enum NodeType : unsigned {
+ /// The result of the mflr at function entry, used for PIC code.
+ GlobalBaseReg = GENERATED_OPCODE_END,
+
+ /// The combination of sra[wd]i and addze used to implemented signed
+ /// integer division by a power of 2. The first operand is the dividend,
+ /// and the second is the constant shift amount (representing the
+ /// divisor).
+ SRA_ADDZE,
+
+ /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFOCRF,
+
+ // FIXME: Remove these once the ANDI glue bug is fixed:
+ /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+ /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+ /// implement truncation of i32 or i64 to i1.
+ ANDI_rec_1_EQ_BIT,
+ ANDI_rec_1_GT_BIT,
+
+ // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+ // target (returns (Lo, Hi)). It takes a chain operand.
+ READ_TIME_BASE,
-class PPCSelectionDAGInfo : public SelectionDAGTargetInfo {
+ /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+ /// loops.
+ BDNZ,
+ BDZ,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS and position indendepent code on PPC32.
+ PPC32_PICGOT,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+};
+
+} // namespace PPCISD
+
+class PPCSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ PPCSelectionDAGInfo();
+
~PPCSelectionDAGInfo() override;
- bool isTargetMemoryOpcode(unsigned Opcode) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
- bool isTargetStrictFPOpcode(unsigned Opcode) const override;
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
std::pair<SDValue, SDValue>
EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
>From 8f6a099d1e448e814176d17cff54390cc49483b0 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:02:30 +0300
Subject: [PATCH 7/9] SystemZ
---
llvm/lib/Target/SystemZ/CMakeLists.txt | 1 +
.../Target/SystemZ/SystemZISelLowering.cpp | 147 -------
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 386 ------------------
llvm/lib/Target/SystemZ/SystemZOperators.td | 46 ++-
.../SystemZ/SystemZSelectionDAGInfo.cpp | 20 +-
.../Target/SystemZ/SystemZSelectionDAGInfo.h | 29 +-
6 files changed, 65 insertions(+), 564 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt
index 0d8f3eac6ee4f..6d94a755322df 100644
--- a/llvm/lib/Target/SystemZ/CMakeLists.txt
+++ b/llvm/lib/Target/SystemZ/CMakeLists.txt
@@ -11,6 +11,7 @@ tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM SystemZGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(SystemZCommonTableGen)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 58109acc92015..dfd76f9b0427f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -7423,153 +7423,6 @@ SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
return LowerOperationWrapper(N, Results, DAG);
}
-const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
-#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
- switch ((SystemZISD::NodeType)Opcode) {
- case SystemZISD::FIRST_NUMBER: break;
- OPCODE(RET_GLUE);
- OPCODE(CALL);
- OPCODE(SIBCALL);
- OPCODE(TLS_GDCALL);
- OPCODE(TLS_LDCALL);
- OPCODE(PCREL_WRAPPER);
- OPCODE(PCREL_OFFSET);
- OPCODE(ICMP);
- OPCODE(FCMP);
- OPCODE(STRICT_FCMP);
- OPCODE(STRICT_FCMPS);
- OPCODE(TM);
- OPCODE(BR_CCMASK);
- OPCODE(SELECT_CCMASK);
- OPCODE(ADJDYNALLOC);
- OPCODE(PROBED_ALLOCA);
- OPCODE(POPCNT);
- OPCODE(SMUL_LOHI);
- OPCODE(UMUL_LOHI);
- OPCODE(SDIVREM);
- OPCODE(UDIVREM);
- OPCODE(SADDO);
- OPCODE(SSUBO);
- OPCODE(UADDO);
- OPCODE(USUBO);
- OPCODE(ADDCARRY);
- OPCODE(SUBCARRY);
- OPCODE(GET_CCMASK);
- OPCODE(MVC);
- OPCODE(NC);
- OPCODE(OC);
- OPCODE(XC);
- OPCODE(CLC);
- OPCODE(MEMSET_MVC);
- OPCODE(STPCPY);
- OPCODE(STRCMP);
- OPCODE(SEARCH_STRING);
- OPCODE(IPM);
- OPCODE(TBEGIN);
- OPCODE(TBEGIN_NOFLOAT);
- OPCODE(TEND);
- OPCODE(BYTE_MASK);
- OPCODE(ROTATE_MASK);
- OPCODE(REPLICATE);
- OPCODE(JOIN_DWORDS);
- OPCODE(SPLAT);
- OPCODE(MERGE_HIGH);
- OPCODE(MERGE_LOW);
- OPCODE(SHL_DOUBLE);
- OPCODE(PERMUTE_DWORDS);
- OPCODE(PERMUTE);
- OPCODE(PACK);
- OPCODE(PACKS_CC);
- OPCODE(PACKLS_CC);
- OPCODE(UNPACK_HIGH);
- OPCODE(UNPACKL_HIGH);
- OPCODE(UNPACK_LOW);
- OPCODE(UNPACKL_LOW);
- OPCODE(VSHL_BY_SCALAR);
- OPCODE(VSRL_BY_SCALAR);
- OPCODE(VSRA_BY_SCALAR);
- OPCODE(VROTL_BY_SCALAR);
- OPCODE(SHL_DOUBLE_BIT);
- OPCODE(SHR_DOUBLE_BIT);
- OPCODE(VSUM);
- OPCODE(VACC);
- OPCODE(VSCBI);
- OPCODE(VAC);
- OPCODE(VSBI);
- OPCODE(VACCC);
- OPCODE(VSBCBI);
- OPCODE(VMAH);
- OPCODE(VMALH);
- OPCODE(VME);
- OPCODE(VMLE);
- OPCODE(VMO);
- OPCODE(VMLO);
- OPCODE(VICMPE);
- OPCODE(VICMPH);
- OPCODE(VICMPHL);
- OPCODE(VICMPES);
- OPCODE(VICMPHS);
- OPCODE(VICMPHLS);
- OPCODE(VFCMPE);
- OPCODE(STRICT_VFCMPE);
- OPCODE(STRICT_VFCMPES);
- OPCODE(VFCMPH);
- OPCODE(STRICT_VFCMPH);
- OPCODE(STRICT_VFCMPHS);
- OPCODE(VFCMPHE);
- OPCODE(STRICT_VFCMPHE);
- OPCODE(STRICT_VFCMPHES);
- OPCODE(VFCMPES);
- OPCODE(VFCMPHS);
- OPCODE(VFCMPHES);
- OPCODE(VFTCI);
- OPCODE(VEXTEND);
- OPCODE(STRICT_VEXTEND);
- OPCODE(VROUND);
- OPCODE(STRICT_VROUND);
- OPCODE(VTM);
- OPCODE(SCMP128HI);
- OPCODE(UCMP128HI);
- OPCODE(VFAE_CC);
- OPCODE(VFAEZ_CC);
- OPCODE(VFEE_CC);
- OPCODE(VFEEZ_CC);
- OPCODE(VFENE_CC);
- OPCODE(VFENEZ_CC);
- OPCODE(VISTR_CC);
- OPCODE(VSTRC_CC);
- OPCODE(VSTRCZ_CC);
- OPCODE(VSTRS_CC);
- OPCODE(VSTRSZ_CC);
- OPCODE(TDC);
- OPCODE(ATOMIC_SWAPW);
- OPCODE(ATOMIC_LOADW_ADD);
- OPCODE(ATOMIC_LOADW_SUB);
- OPCODE(ATOMIC_LOADW_AND);
- OPCODE(ATOMIC_LOADW_OR);
- OPCODE(ATOMIC_LOADW_XOR);
- OPCODE(ATOMIC_LOADW_NAND);
- OPCODE(ATOMIC_LOADW_MIN);
- OPCODE(ATOMIC_LOADW_MAX);
- OPCODE(ATOMIC_LOADW_UMIN);
- OPCODE(ATOMIC_LOADW_UMAX);
- OPCODE(ATOMIC_CMP_SWAPW);
- OPCODE(ATOMIC_CMP_SWAP);
- OPCODE(ATOMIC_LOAD_128);
- OPCODE(ATOMIC_STORE_128);
- OPCODE(ATOMIC_CMP_SWAP_128);
- OPCODE(LRV);
- OPCODE(STRV);
- OPCODE(VLER);
- OPCODE(VSTER);
- OPCODE(STCKF);
- OPCODE(PREFETCH);
- OPCODE(ADA_ENTRY);
- }
- return nullptr;
-#undef OPCODE
-}
-
// Return true if VT is a vector whose elements are a whole number of bytes
// in width. Also check for presence of vector support.
bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index d5b76031766dd..13a1cd1614a53 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -22,390 +22,6 @@
#include <optional>
namespace llvm {
-namespace SystemZISD {
-enum NodeType : unsigned {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // Return with a glue operand. Operand 0 is the chain operand.
- RET_GLUE,
-
- // Calls a function. Operand 0 is the chain operand and operand 1
- // is the target address. The arguments start at operand 2.
- // There is an optional glue operand at the end.
- CALL,
- SIBCALL,
-
- // TLS calls. Like regular calls, except operand 1 is the TLS symbol.
- // (The call target is implicitly __tls_get_offset.)
- TLS_GDCALL,
- TLS_LDCALL,
-
- // Wraps a TargetGlobalAddress that should be loaded using PC-relative
- // accesses (LARL). Operand 0 is the address.
- PCREL_WRAPPER,
-
- // Used in cases where an offset is applied to a TargetGlobalAddress.
- // Operand 0 is the full TargetGlobalAddress and operand 1 is a
- // PCREL_WRAPPER for an anchor point. This is used so that we can
- // cheaply refer to either the full address or the anchor point
- // as a register base.
- PCREL_OFFSET,
-
- // Integer comparisons. There are three operands: the two values
- // to compare, and an integer of type SystemZICMP.
- ICMP,
-
- // Floating-point comparisons. The two operands are the values to compare.
- FCMP,
-
- // Test under mask. The first operand is ANDed with the second operand
- // and the condition codes are set on the result. The third operand is
- // a boolean that is true if the condition codes need to distinguish
- // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
- // register forms do but the memory forms don't).
- TM,
-
- // Branches if a condition is true. Operand 0 is the chain operand;
- // operand 1 is the 4-bit condition-code mask, with bit N in
- // big-endian order meaning "branch if CC=N"; operand 2 is the
- // target block and operand 3 is the flag operand.
- BR_CCMASK,
-
- // Selects between operand 0 and operand 1. Operand 2 is the
- // mask of condition-code values for which operand 0 should be
- // chosen over operand 1; it has the same form as BR_CCMASK.
- // Operand 3 is the flag operand.
- SELECT_CCMASK,
-
- // Evaluates to the gap between the stack pointer and the
- // base of the dynamically-allocatable area.
- ADJDYNALLOC,
-
- // For allocating stack space when using stack clash protector.
- // Allocation is performed by block, and each block is probed.
- PROBED_ALLOCA,
-
- // Count number of bits set in operand 0 per byte.
- POPCNT,
-
- // Wrappers around the ISD opcodes of the same name. The output is GR128.
- // Input operands may be GR64 or GR32, depending on the instruction.
- SMUL_LOHI,
- UMUL_LOHI,
- SDIVREM,
- UDIVREM,
-
- // Add/subtract with overflow/carry. These have the same operands as
- // the corresponding standard operations, except with the carry flag
- // replaced by a condition code value.
- SADDO, SSUBO, UADDO, USUBO, ADDCARRY, SUBCARRY,
-
- // Set the condition code from a boolean value in operand 0.
- // Operand 1 is a mask of all condition-code values that may result of this
- // operation, operand 2 is a mask of condition-code values that may result
- // if the boolean is true.
- // Note that this operation is always optimized away, we will never
- // generate any code for it.
- GET_CCMASK,
-
- // Use a series of MVCs to copy bytes from one memory location to another.
- // The operands are:
- // - the target address
- // - the source address
- // - the constant length
- //
- // This isn't a memory opcode because we'd need to attach two
- // MachineMemOperands rather than one.
- MVC,
-
- // Similar to MVC, but for logic operations (AND, OR, XOR).
- NC,
- OC,
- XC,
-
- // Use CLC to compare two blocks of memory, with the same comments
- // as for MVC.
- CLC,
-
- // Use MVC to set a block of memory after storing the first byte.
- MEMSET_MVC,
-
- // Use an MVST-based sequence to implement stpcpy().
- STPCPY,
-
- // Use a CLST-based sequence to implement strcmp(). The two input operands
- // are the addresses of the strings to compare.
- STRCMP,
-
- // Use an SRST-based sequence to search a block of memory. The first
- // operand is the end address, the second is the start, and the third
- // is the character to search for. CC is set to 1 on success and 2
- // on failure.
- SEARCH_STRING,
-
- // Store the CC value in bits 29 and 28 of an integer.
- IPM,
-
- // Transaction begin. The first operand is the chain, the second
- // the TDB pointer, and the third the immediate control field.
- // Returns CC value and chain.
- TBEGIN,
- TBEGIN_NOFLOAT,
-
- // Transaction end. Just the chain operand. Returns CC value and chain.
- TEND,
-
- // Create a vector constant by filling byte N of the result with bit
- // 15-N of the single operand.
- BYTE_MASK,
-
- // Create a vector constant by replicating an element-sized RISBG-style mask.
- // The first operand specifies the starting set bit and the second operand
- // specifies the ending set bit. Both operands count from the MSB of the
- // element.
- ROTATE_MASK,
-
- // Replicate a GPR scalar value into all elements of a vector.
- REPLICATE,
-
- // Create a vector from two i64 GPRs.
- JOIN_DWORDS,
-
- // Replicate one element of a vector into all elements. The first operand
- // is the vector and the second is the index of the element to replicate.
- SPLAT,
-
- // Interleave elements from the high half of operand 0 and the high half
- // of operand 1.
- MERGE_HIGH,
-
- // Likewise for the low halves.
- MERGE_LOW,
-
- // Concatenate the vectors in the first two operands, shift them left
- // by the third operand, and take the first half of the result.
- SHL_DOUBLE,
-
- // Take one element of the first v2i64 operand and the one element of
- // the second v2i64 operand and concatenate them to form a v2i64 result.
- // The third operand is a 4-bit value of the form 0A0B, where A and B
- // are the element selectors for the first operand and second operands
- // respectively.
- PERMUTE_DWORDS,
-
- // Perform a general vector permute on vector operands 0 and 1.
- // Each byte of operand 2 controls the corresponding byte of the result,
- // in the same way as a byte-level VECTOR_SHUFFLE mask.
- PERMUTE,
-
- // Pack vector operands 0 and 1 into a single vector with half-sized elements.
- PACK,
-
- // Likewise, but saturate the result and set CC. PACKS_CC does signed
- // saturation and PACKLS_CC does unsigned saturation.
- PACKS_CC,
- PACKLS_CC,
-
- // Unpack the first half of vector operand 0 into double-sized elements.
- // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
- UNPACK_HIGH,
- UNPACKL_HIGH,
-
- // Likewise for the second half.
- UNPACK_LOW,
- UNPACKL_LOW,
-
- // Shift/rotate each element of vector operand 0 by the number of bits
- // specified by scalar operand 1.
- VSHL_BY_SCALAR,
- VSRL_BY_SCALAR,
- VSRA_BY_SCALAR,
- VROTL_BY_SCALAR,
-
- // Concatenate the vectors in the first two operands, shift them left/right
- // bitwise by the third operand, and take the first/last half of the result.
- SHL_DOUBLE_BIT,
- SHR_DOUBLE_BIT,
-
- // For each element of the output type, sum across all sub-elements of
- // operand 0 belonging to the corresponding element, and add in the
- // rightmost sub-element of the corresponding element of operand 1.
- VSUM,
-
- // Compute carry/borrow indication for add/subtract.
- VACC, VSCBI,
- // Add/subtract with carry/borrow.
- VAC, VSBI,
- // Compute carry/borrow indication for add/subtract with carry/borrow.
- VACCC, VSBCBI,
-
- // High-word multiply-and-add.
- VMAH, VMALH,
- // Widen and multiply even/odd vector elements.
- VME, VMLE, VMO, VMLO,
-
- // Compare integer vector operands 0 and 1 to produce the usual 0/-1
- // vector result. VICMPE is for equality, VICMPH for "signed greater than"
- // and VICMPHL for "unsigned greater than".
- VICMPE,
- VICMPH,
- VICMPHL,
-
- // Likewise, but also set the condition codes on the result.
- VICMPES,
- VICMPHS,
- VICMPHLS,
-
- // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1
- // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and
- // greater than" and VFCMPHE for "ordered and greater than or equal to".
- VFCMPE,
- VFCMPH,
- VFCMPHE,
-
- // Likewise, but also set the condition codes on the result.
- VFCMPES,
- VFCMPHS,
- VFCMPHES,
-
- // Test floating-point data class for vectors.
- VFTCI,
-
- // Extend the even f32 elements of vector operand 0 to produce a vector
- // of f64 elements.
- VEXTEND,
-
- // Round the f64 elements of vector operand 0 to f32s and store them in the
- // even elements of the result.
- VROUND,
-
- // AND the two vector operands together and set CC based on the result.
- VTM,
-
- // i128 high integer comparisons.
- SCMP128HI,
- UCMP128HI,
-
- // String operations that set CC as a side-effect.
- VFAE_CC,
- VFAEZ_CC,
- VFEE_CC,
- VFEEZ_CC,
- VFENE_CC,
- VFENEZ_CC,
- VISTR_CC,
- VSTRC_CC,
- VSTRCZ_CC,
- VSTRS_CC,
- VSTRSZ_CC,
-
- // Test Data Class.
- //
- // Operand 0: the value to test
- // Operand 1: the bit mask
- TDC,
-
- // z/OS XPLINK ADA Entry
- // Wraps a TargetGlobalAddress that should be loaded from a function's
- // AssociatedData Area (ADA). Tha ADA is passed to the function by the
- // caller in the XPLink ABI defined register R5.
- // Operand 0: the GlobalValue/External Symbol
- // Operand 1: the ADA register
- // Operand 2: the offset (0 for the first and 8 for the second element in the
- // function descriptor)
- ADA_ENTRY,
-
- // Strict variants of scalar floating-point comparisons.
- // Quiet and signaling versions.
- FIRST_STRICTFP_OPCODE,
- STRICT_FCMP = FIRST_STRICTFP_OPCODE,
- STRICT_FCMPS,
-
- // Strict variants of vector floating-point comparisons.
- // Quiet and signaling versions.
- STRICT_VFCMPE,
- STRICT_VFCMPH,
- STRICT_VFCMPHE,
- STRICT_VFCMPES,
- STRICT_VFCMPHS,
- STRICT_VFCMPHES,
-
- // Strict variants of VEXTEND and VROUND.
- STRICT_VEXTEND,
- STRICT_VROUND,
- LAST_STRICTFP_OPCODE = STRICT_VROUND,
-
- // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
- // ATOMIC_LOAD_<op>.
- //
- // Operand 0: the address of the containing 32-bit-aligned field
- // Operand 1: the second operand of <op>, in the high bits of an i32
- // for everything except ATOMIC_SWAPW
- // Operand 2: how many bits to rotate the i32 left to bring the first
- // operand into the high bits
- // Operand 3: the negative of operand 2, for rotating the other way
- // Operand 4: the width of the field in bits (8 or 16)
- FIRST_MEMORY_OPCODE,
- ATOMIC_SWAPW = FIRST_MEMORY_OPCODE,
- ATOMIC_LOADW_ADD,
- ATOMIC_LOADW_SUB,
- ATOMIC_LOADW_AND,
- ATOMIC_LOADW_OR,
- ATOMIC_LOADW_XOR,
- ATOMIC_LOADW_NAND,
- ATOMIC_LOADW_MIN,
- ATOMIC_LOADW_MAX,
- ATOMIC_LOADW_UMIN,
- ATOMIC_LOADW_UMAX,
-
- // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
- //
- // Operand 0: the address of the containing 32-bit-aligned field
- // Operand 1: the compare value, in the low bits of an i32
- // Operand 2: the swap value, in the low bits of an i32
- // Operand 3: how many bits to rotate the i32 left to bring the first
- // operand into the high bits
- // Operand 4: the negative of operand 2, for rotating the other way
- // Operand 5: the width of the field in bits (8 or 16)
- ATOMIC_CMP_SWAPW,
-
- // Atomic compare-and-swap returning CC value.
- // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
- ATOMIC_CMP_SWAP,
-
- // 128-bit atomic load.
- // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr)
- ATOMIC_LOAD_128,
-
- // 128-bit atomic store.
- // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr)
- ATOMIC_STORE_128,
-
- // 128-bit atomic compare-and-swap.
- // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
- ATOMIC_CMP_SWAP_128,
-
- // Byte swapping load/store. Same operands as regular load/store.
- LRV, STRV,
-
- // Element swapping load/store. Same operands as regular load/store.
- VLER, VSTER,
-
- // Use STORE CLOCK FAST to store current TOD clock value.
- STCKF,
-
- // Prefetch from the second operand using the 4-bit control code in
- // the first operand. The code is 1 for a load prefetch and 2 for
- // a store prefetch.
- PREFETCH,
- LAST_MEMORY_OPCODE = PREFETCH,
-};
-
-// Return true if OPCODE is some kind of PC-relative address.
-inline bool isPCREL(unsigned Opcode) {
- return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
-}
-} // end namespace SystemZISD
namespace SystemZICMP {
// Describes whether an integer comparison needs to be signed or unsigned,
@@ -532,8 +148,6 @@ class SystemZTargetLowering : public TargetLowering {
return true;
}
- const char *getTargetNodeName(unsigned Opcode) const override;
-
// This function currently returns cost for srl/ipm/cc sequence for merging.
CondMergingParams
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 547d3dcf92804..df67c07420621 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -285,10 +285,14 @@ def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
SDT_ZWrapOffset, []>;
def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>;
def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>;
-def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp,
- [SDNPHasChain]>;
-def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp,
- [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp,
+ [SDNPHasChain]>;
+ def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp,
+ [SDNPHasChain]>;
+}
+
def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>;
def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
[SDNPHasChain]>;
@@ -382,29 +386,33 @@ def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecCompareCC>;
def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecCompareCC>;
def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecCompareCC>;
def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
-def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE",
- SDT_ZVecBinaryConv, [SDNPHasChain]>;
-def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES",
- SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
-def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH",
- SDT_ZVecBinaryConv, [SDNPHasChain]>;
-def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS",
- SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
-def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE",
- SDT_ZVecBinaryConv, [SDNPHasChain]>;
-def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES",
- SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>;
def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>;
def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>;
def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
-def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND",
- SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
-def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND",
+
+let IsStrictFP = true in {
+ def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+ def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+ def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+ def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+ def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+ def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+ def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND",
+ SDT_ZVecUnaryConv, [SDNPHasChain]>;
+ def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND",
SDT_ZVecUnaryConv, [SDNPHasChain]>;
+}
+
def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
def z_scmp128hi : SDNode<"SystemZISD::SCMP128HI", SDT_ZCmp>;
def z_ucmp128hi : SDNode<"SystemZISD::UCMP128HI", SDT_ZCmp>;
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index eb00d484af693..88feba8adce0e 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -10,21 +10,27 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZSelectionDAGInfo.h"
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#define GET_SDNODE_DESC
+#include "SystemZGenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-bool SystemZSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= SystemZISD::FIRST_MEMORY_OPCODE &&
- Opcode <= SystemZISD::LAST_MEMORY_OPCODE;
-}
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(SystemZGenSDNodeInfo) {}
+
+const char *SystemZSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+ switch (static_cast<SystemZISD::NodeType>(Opcode)) {
+ case SystemZISD::GET_CCMASK:
+ return "SystemZISD::GET_CCMASK";
+ }
-bool SystemZSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
- return Opcode >= SystemZISD::FIRST_STRICTFP_OPCODE &&
- Opcode <= SystemZISD::LAST_STRICTFP_OPCODE;
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
}
static unsigned getMemMemLenAdj(unsigned Op) {
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 200566f9646c1..d25fddab65161 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -15,15 +15,34 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "SystemZGenSDNodeInfo.inc"
+
namespace llvm {
+namespace SystemZISD {
+
+enum NodeType : unsigned {
+ // Set the condition code from a boolean value in operand 0.
+ // Operand 1 is a mask of all condition-code values that may result of this
+ // operation, operand 2 is a mask of condition-code values that may result
+ // if the boolean is true.
+ // Note that this operation is always optimized away, we will never
+ // generate any code for it.
+ GET_CCMASK = GENERATED_OPCODE_END,
+};
-class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo {
-public:
- explicit SystemZSelectionDAGInfo() = default;
+// Return true if OPCODE is some kind of PC-relative address.
+inline bool isPCREL(unsigned Opcode) {
+ return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
+}
- bool isTargetMemoryOpcode(unsigned Opcode) const override;
+} // namespace SystemZISD
+
+class SystemZSelectionDAGInfo : public SelectionDAGGenTargetInfo {
+public:
+ SystemZSelectionDAGInfo();
- bool isTargetStrictFPOpcode(unsigned Opcode) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst, SDValue Src,
>From 10933f146ae4001e7a2b9d80d93aa82133143d6c Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:02:48 +0300
Subject: [PATCH 8/9] X86
---
llvm/lib/Target/X86/CMakeLists.txt | 1 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 478 +--------
llvm/lib/Target/X86/X86ISelLowering.h | 982 +------------------
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 6 +-
llvm/lib/Target/X86/X86InstrFragments.td | 11 +-
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 52 +-
llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 77 +-
llvm/lib/Target/X86/X86SelectionDAGInfo.h | 56 +-
8 files changed, 185 insertions(+), 1478 deletions(-)
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index f9bd233cf8ecf..407772197b24a 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -17,6 +17,7 @@ tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info
tablegen(LLVM X86GenMnemonicTables.inc -gen-x86-mnemonic-tables -asmwriternum=1)
tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM X86GenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables -asmwriternum=1)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 621f1868d3311..7db2cda6eba46 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19499,9 +19499,9 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
}
if (!Ret) {
- X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC
- : LocalDynamic ? X86ISD::TLSBASEADDR
- : X86ISD::TLSADDR;
+ unsigned CallType = UseTLSDESC ? X86ISD::TLSDESC
+ : LocalDynamic ? X86ISD::TLSBASEADDR
+ : X86ISD::TLSADDR;
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
if (LoadGlobalBaseReg) {
@@ -29386,7 +29386,7 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
APInt PreferredZero = APInt::getZero(SizeInBits);
APInt OppositeZero = PreferredZero;
EVT IVT = VT.changeTypeToInteger();
- X86ISD::NodeType MinMaxOp;
+ unsigned MinMaxOp;
if (IsMaxOp) {
MinMaxOp = X86ISD::FMAX;
OppositeZero.setSignBit();
@@ -34914,476 +34914,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
}
-const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((X86ISD::NodeType)Opcode) {
- case X86ISD::FIRST_NUMBER: break;
-#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
- NODE_NAME_CASE(BSF)
- NODE_NAME_CASE(BSR)
- NODE_NAME_CASE(FSHL)
- NODE_NAME_CASE(FSHR)
- NODE_NAME_CASE(FAND)
- NODE_NAME_CASE(FANDN)
- NODE_NAME_CASE(FOR)
- NODE_NAME_CASE(FXOR)
- NODE_NAME_CASE(FILD)
- NODE_NAME_CASE(FIST)
- NODE_NAME_CASE(FP_TO_INT_IN_MEM)
- NODE_NAME_CASE(FLD)
- NODE_NAME_CASE(FST)
- NODE_NAME_CASE(CALL)
- NODE_NAME_CASE(CALL_RVMARKER)
- NODE_NAME_CASE(IMP_CALL)
- NODE_NAME_CASE(BT)
- NODE_NAME_CASE(CMP)
- NODE_NAME_CASE(FCMP)
- NODE_NAME_CASE(STRICT_FCMP)
- NODE_NAME_CASE(STRICT_FCMPS)
- NODE_NAME_CASE(COMI)
- NODE_NAME_CASE(UCOMI)
- NODE_NAME_CASE(COMX)
- NODE_NAME_CASE(UCOMX)
- NODE_NAME_CASE(CMPM)
- NODE_NAME_CASE(CMPMM)
- NODE_NAME_CASE(STRICT_CMPM)
- NODE_NAME_CASE(CMPMM_SAE)
- NODE_NAME_CASE(SETCC)
- NODE_NAME_CASE(SETCC_CARRY)
- NODE_NAME_CASE(FSETCC)
- NODE_NAME_CASE(FSETCCM)
- NODE_NAME_CASE(FSETCCM_SAE)
- NODE_NAME_CASE(CMOV)
- NODE_NAME_CASE(BRCOND)
- NODE_NAME_CASE(RET_GLUE)
- NODE_NAME_CASE(IRET)
- NODE_NAME_CASE(REP_STOS)
- NODE_NAME_CASE(REP_MOVS)
- NODE_NAME_CASE(GlobalBaseReg)
- NODE_NAME_CASE(Wrapper)
- NODE_NAME_CASE(WrapperRIP)
- NODE_NAME_CASE(MOVQ2DQ)
- NODE_NAME_CASE(MOVDQ2Q)
- NODE_NAME_CASE(MMX_MOVD2W)
- NODE_NAME_CASE(MMX_MOVW2D)
- NODE_NAME_CASE(PEXTRB)
- NODE_NAME_CASE(PEXTRW)
- NODE_NAME_CASE(INSERTPS)
- NODE_NAME_CASE(PINSRB)
- NODE_NAME_CASE(PINSRW)
- NODE_NAME_CASE(PSHUFB)
- NODE_NAME_CASE(ANDNP)
- NODE_NAME_CASE(BLENDI)
- NODE_NAME_CASE(BLENDV)
- NODE_NAME_CASE(HADD)
- NODE_NAME_CASE(HSUB)
- NODE_NAME_CASE(FHADD)
- NODE_NAME_CASE(FHSUB)
- NODE_NAME_CASE(CONFLICT)
- NODE_NAME_CASE(FMAX)
- NODE_NAME_CASE(FMAXS)
- NODE_NAME_CASE(FMAX_SAE)
- NODE_NAME_CASE(FMAXS_SAE)
- NODE_NAME_CASE(STRICT_FMAX)
- NODE_NAME_CASE(FMIN)
- NODE_NAME_CASE(FMINS)
- NODE_NAME_CASE(FMIN_SAE)
- NODE_NAME_CASE(FMINS_SAE)
- NODE_NAME_CASE(STRICT_FMIN)
- NODE_NAME_CASE(FMAXC)
- NODE_NAME_CASE(FMINC)
- NODE_NAME_CASE(FRSQRT)
- NODE_NAME_CASE(FRCP)
- NODE_NAME_CASE(EXTRQI)
- NODE_NAME_CASE(INSERTQI)
- NODE_NAME_CASE(TLSADDR)
- NODE_NAME_CASE(TLSBASEADDR)
- NODE_NAME_CASE(TLSCALL)
- NODE_NAME_CASE(TLSDESC)
- NODE_NAME_CASE(EH_SJLJ_SETJMP)
- NODE_NAME_CASE(EH_SJLJ_LONGJMP)
- NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
- NODE_NAME_CASE(EH_RETURN)
- NODE_NAME_CASE(TC_RETURN)
- NODE_NAME_CASE(FNSTCW16m)
- NODE_NAME_CASE(FLDCW16m)
- NODE_NAME_CASE(FNSTENVm)
- NODE_NAME_CASE(FLDENVm)
- NODE_NAME_CASE(LCMPXCHG_DAG)
- NODE_NAME_CASE(LCMPXCHG8_DAG)
- NODE_NAME_CASE(LCMPXCHG16_DAG)
- NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
- NODE_NAME_CASE(LADD)
- NODE_NAME_CASE(LSUB)
- NODE_NAME_CASE(LOR)
- NODE_NAME_CASE(LXOR)
- NODE_NAME_CASE(LAND)
- NODE_NAME_CASE(LBTS)
- NODE_NAME_CASE(LBTC)
- NODE_NAME_CASE(LBTR)
- NODE_NAME_CASE(LBTS_RM)
- NODE_NAME_CASE(LBTC_RM)
- NODE_NAME_CASE(LBTR_RM)
- NODE_NAME_CASE(AADD)
- NODE_NAME_CASE(AOR)
- NODE_NAME_CASE(AXOR)
- NODE_NAME_CASE(AAND)
- NODE_NAME_CASE(VZEXT_MOVL)
- NODE_NAME_CASE(VZEXT_LOAD)
- NODE_NAME_CASE(VEXTRACT_STORE)
- NODE_NAME_CASE(VTRUNC)
- NODE_NAME_CASE(VTRUNCS)
- NODE_NAME_CASE(VTRUNCUS)
- NODE_NAME_CASE(VMTRUNC)
- NODE_NAME_CASE(VMTRUNCS)
- NODE_NAME_CASE(VMTRUNCUS)
- NODE_NAME_CASE(VTRUNCSTORES)
- NODE_NAME_CASE(VTRUNCSTOREUS)
- NODE_NAME_CASE(VMTRUNCSTORES)
- NODE_NAME_CASE(VMTRUNCSTOREUS)
- NODE_NAME_CASE(VFPEXT)
- NODE_NAME_CASE(STRICT_VFPEXT)
- NODE_NAME_CASE(VFPEXT_SAE)
- NODE_NAME_CASE(VFPEXTS)
- NODE_NAME_CASE(VFPEXTS_SAE)
- NODE_NAME_CASE(VFPROUND)
- NODE_NAME_CASE(VFPROUND2)
- NODE_NAME_CASE(VFPROUND2_RND)
- NODE_NAME_CASE(STRICT_VFPROUND)
- NODE_NAME_CASE(VMFPROUND)
- NODE_NAME_CASE(VFPROUND_RND)
- NODE_NAME_CASE(VFPROUNDS)
- NODE_NAME_CASE(VFPROUNDS_RND)
- NODE_NAME_CASE(VSHLDQ)
- NODE_NAME_CASE(VSRLDQ)
- NODE_NAME_CASE(VSHL)
- NODE_NAME_CASE(VSRL)
- NODE_NAME_CASE(VSRA)
- NODE_NAME_CASE(VSHLI)
- NODE_NAME_CASE(VSRLI)
- NODE_NAME_CASE(VSRAI)
- NODE_NAME_CASE(VSHLV)
- NODE_NAME_CASE(VSRLV)
- NODE_NAME_CASE(VSRAV)
- NODE_NAME_CASE(VROTLI)
- NODE_NAME_CASE(VROTRI)
- NODE_NAME_CASE(VPPERM)
- NODE_NAME_CASE(CMPP)
- NODE_NAME_CASE(STRICT_CMPP)
- NODE_NAME_CASE(PCMPEQ)
- NODE_NAME_CASE(PCMPGT)
- NODE_NAME_CASE(PHMINPOS)
- NODE_NAME_CASE(ADD)
- NODE_NAME_CASE(SUB)
- NODE_NAME_CASE(ADC)
- NODE_NAME_CASE(SBB)
- NODE_NAME_CASE(SMUL)
- NODE_NAME_CASE(UMUL)
- NODE_NAME_CASE(OR)
- NODE_NAME_CASE(XOR)
- NODE_NAME_CASE(AND)
- NODE_NAME_CASE(BEXTR)
- NODE_NAME_CASE(BEXTRI)
- NODE_NAME_CASE(BZHI)
- NODE_NAME_CASE(PDEP)
- NODE_NAME_CASE(PEXT)
- NODE_NAME_CASE(MUL_IMM)
- NODE_NAME_CASE(MOVMSK)
- NODE_NAME_CASE(PTEST)
- NODE_NAME_CASE(TESTP)
- NODE_NAME_CASE(KORTEST)
- NODE_NAME_CASE(KTEST)
- NODE_NAME_CASE(KADD)
- NODE_NAME_CASE(KSHIFTL)
- NODE_NAME_CASE(KSHIFTR)
- NODE_NAME_CASE(PACKSS)
- NODE_NAME_CASE(PACKUS)
- NODE_NAME_CASE(PALIGNR)
- NODE_NAME_CASE(VALIGN)
- NODE_NAME_CASE(VSHLD)
- NODE_NAME_CASE(VSHRD)
- NODE_NAME_CASE(PSHUFD)
- NODE_NAME_CASE(PSHUFHW)
- NODE_NAME_CASE(PSHUFLW)
- NODE_NAME_CASE(SHUFP)
- NODE_NAME_CASE(SHUF128)
- NODE_NAME_CASE(MOVLHPS)
- NODE_NAME_CASE(MOVHLPS)
- NODE_NAME_CASE(MOVDDUP)
- NODE_NAME_CASE(MOVSHDUP)
- NODE_NAME_CASE(MOVSLDUP)
- NODE_NAME_CASE(MOVSD)
- NODE_NAME_CASE(MOVSS)
- NODE_NAME_CASE(MOVSH)
- NODE_NAME_CASE(UNPCKL)
- NODE_NAME_CASE(UNPCKH)
- NODE_NAME_CASE(VBROADCAST)
- NODE_NAME_CASE(VBROADCAST_LOAD)
- NODE_NAME_CASE(VBROADCASTM)
- NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
- NODE_NAME_CASE(VPERMILPV)
- NODE_NAME_CASE(VPERMILPI)
- NODE_NAME_CASE(VPERM2X128)
- NODE_NAME_CASE(VPERMV)
- NODE_NAME_CASE(VPERMV3)
- NODE_NAME_CASE(VPERMI)
- NODE_NAME_CASE(VPTERNLOG)
- NODE_NAME_CASE(FP_TO_SINT_SAT)
- NODE_NAME_CASE(FP_TO_UINT_SAT)
- NODE_NAME_CASE(VFIXUPIMM)
- NODE_NAME_CASE(VFIXUPIMM_SAE)
- NODE_NAME_CASE(VFIXUPIMMS)
- NODE_NAME_CASE(VFIXUPIMMS_SAE)
- NODE_NAME_CASE(VRANGE)
- NODE_NAME_CASE(VRANGE_SAE)
- NODE_NAME_CASE(VRANGES)
- NODE_NAME_CASE(VRANGES_SAE)
- NODE_NAME_CASE(PMULUDQ)
- NODE_NAME_CASE(PMULDQ)
- NODE_NAME_CASE(PSADBW)
- NODE_NAME_CASE(DBPSADBW)
- NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
- NODE_NAME_CASE(VAARG_64)
- NODE_NAME_CASE(VAARG_X32)
- NODE_NAME_CASE(DYN_ALLOCA)
- NODE_NAME_CASE(MFENCE)
- NODE_NAME_CASE(SEG_ALLOCA)
- NODE_NAME_CASE(PROBED_ALLOCA)
- NODE_NAME_CASE(RDRAND)
- NODE_NAME_CASE(RDSEED)
- NODE_NAME_CASE(RDPKRU)
- NODE_NAME_CASE(WRPKRU)
- NODE_NAME_CASE(VPMADDUBSW)
- NODE_NAME_CASE(VPMADDWD)
- NODE_NAME_CASE(VPSHA)
- NODE_NAME_CASE(VPSHL)
- NODE_NAME_CASE(VPCOM)
- NODE_NAME_CASE(VPCOMU)
- NODE_NAME_CASE(VPERMIL2)
- NODE_NAME_CASE(FMSUB)
- NODE_NAME_CASE(STRICT_FMSUB)
- NODE_NAME_CASE(FNMADD)
- NODE_NAME_CASE(STRICT_FNMADD)
- NODE_NAME_CASE(FNMSUB)
- NODE_NAME_CASE(STRICT_FNMSUB)
- NODE_NAME_CASE(FMADDSUB)
- NODE_NAME_CASE(FMSUBADD)
- NODE_NAME_CASE(FMADD_RND)
- NODE_NAME_CASE(FNMADD_RND)
- NODE_NAME_CASE(FMSUB_RND)
- NODE_NAME_CASE(FNMSUB_RND)
- NODE_NAME_CASE(FMADDSUB_RND)
- NODE_NAME_CASE(FMSUBADD_RND)
- NODE_NAME_CASE(VFMADDC)
- NODE_NAME_CASE(VFMADDC_RND)
- NODE_NAME_CASE(VFCMADDC)
- NODE_NAME_CASE(VFCMADDC_RND)
- NODE_NAME_CASE(VFMULC)
- NODE_NAME_CASE(VFMULC_RND)
- NODE_NAME_CASE(VFCMULC)
- NODE_NAME_CASE(VFCMULC_RND)
- NODE_NAME_CASE(VFMULCSH)
- NODE_NAME_CASE(VFMULCSH_RND)
- NODE_NAME_CASE(VFCMULCSH)
- NODE_NAME_CASE(VFCMULCSH_RND)
- NODE_NAME_CASE(VFMADDCSH)
- NODE_NAME_CASE(VFMADDCSH_RND)
- NODE_NAME_CASE(VFCMADDCSH)
- NODE_NAME_CASE(VFCMADDCSH_RND)
- NODE_NAME_CASE(VPMADD52H)
- NODE_NAME_CASE(VPMADD52L)
- NODE_NAME_CASE(VRNDSCALE)
- NODE_NAME_CASE(STRICT_VRNDSCALE)
- NODE_NAME_CASE(VRNDSCALE_SAE)
- NODE_NAME_CASE(VRNDSCALES)
- NODE_NAME_CASE(VRNDSCALES_SAE)
- NODE_NAME_CASE(VREDUCE)
- NODE_NAME_CASE(VREDUCE_SAE)
- NODE_NAME_CASE(VREDUCES)
- NODE_NAME_CASE(VREDUCES_SAE)
- NODE_NAME_CASE(VGETMANT)
- NODE_NAME_CASE(VGETMANT_SAE)
- NODE_NAME_CASE(VGETMANTS)
- NODE_NAME_CASE(VGETMANTS_SAE)
- NODE_NAME_CASE(PCMPESTR)
- NODE_NAME_CASE(PCMPISTR)
- NODE_NAME_CASE(XTEST)
- NODE_NAME_CASE(COMPRESS)
- NODE_NAME_CASE(EXPAND)
- NODE_NAME_CASE(SELECTS)
- NODE_NAME_CASE(ADDSUB)
- NODE_NAME_CASE(RCP14)
- NODE_NAME_CASE(RCP14S)
- NODE_NAME_CASE(RSQRT14)
- NODE_NAME_CASE(RSQRT14S)
- NODE_NAME_CASE(FADD_RND)
- NODE_NAME_CASE(FADDS)
- NODE_NAME_CASE(FADDS_RND)
- NODE_NAME_CASE(FSUB_RND)
- NODE_NAME_CASE(FSUBS)
- NODE_NAME_CASE(FSUBS_RND)
- NODE_NAME_CASE(FMUL_RND)
- NODE_NAME_CASE(FMULS)
- NODE_NAME_CASE(FMULS_RND)
- NODE_NAME_CASE(FDIV_RND)
- NODE_NAME_CASE(FDIVS)
- NODE_NAME_CASE(FDIVS_RND)
- NODE_NAME_CASE(FSQRT_RND)
- NODE_NAME_CASE(FSQRTS)
- NODE_NAME_CASE(FSQRTS_RND)
- NODE_NAME_CASE(FGETEXP)
- NODE_NAME_CASE(FGETEXP_SAE)
- NODE_NAME_CASE(FGETEXPS)
- NODE_NAME_CASE(FGETEXPS_SAE)
- NODE_NAME_CASE(SCALEF)
- NODE_NAME_CASE(SCALEF_RND)
- NODE_NAME_CASE(SCALEFS)
- NODE_NAME_CASE(SCALEFS_RND)
- NODE_NAME_CASE(MULHRS)
- NODE_NAME_CASE(SINT_TO_FP_RND)
- NODE_NAME_CASE(UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTTP2SI)
- NODE_NAME_CASE(CVTTP2UI)
- NODE_NAME_CASE(STRICT_CVTTP2SI)
- NODE_NAME_CASE(STRICT_CVTTP2UI)
- NODE_NAME_CASE(MCVTTP2SI)
- NODE_NAME_CASE(MCVTTP2UI)
- NODE_NAME_CASE(CVTTP2SI_SAE)
- NODE_NAME_CASE(CVTTP2UI_SAE)
- NODE_NAME_CASE(CVTTS2SI)
- NODE_NAME_CASE(CVTTS2UI)
- NODE_NAME_CASE(CVTTS2SI_SAE)
- NODE_NAME_CASE(CVTTS2UI_SAE)
- NODE_NAME_CASE(CVTSI2P)
- NODE_NAME_CASE(CVTUI2P)
- NODE_NAME_CASE(STRICT_CVTSI2P)
- NODE_NAME_CASE(STRICT_CVTUI2P)
- NODE_NAME_CASE(MCVTSI2P)
- NODE_NAME_CASE(MCVTUI2P)
- NODE_NAME_CASE(VFPCLASS)
- NODE_NAME_CASE(VFPCLASSS)
- NODE_NAME_CASE(MULTISHIFT)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTPS2PH)
- NODE_NAME_CASE(STRICT_CVTPS2PH)
- NODE_NAME_CASE(CVTPS2PH_SAE)
- NODE_NAME_CASE(MCVTPS2PH)
- NODE_NAME_CASE(MCVTPS2PH_SAE)
- NODE_NAME_CASE(CVTPH2PS)
- NODE_NAME_CASE(STRICT_CVTPH2PS)
- NODE_NAME_CASE(CVTPH2PS_SAE)
- NODE_NAME_CASE(CVTP2SI)
- NODE_NAME_CASE(CVTP2UI)
- NODE_NAME_CASE(MCVTP2SI)
- NODE_NAME_CASE(MCVTP2UI)
- NODE_NAME_CASE(CVTP2SI_RND)
- NODE_NAME_CASE(CVTP2UI_RND)
- NODE_NAME_CASE(CVTS2SI)
- NODE_NAME_CASE(CVTS2UI)
- NODE_NAME_CASE(CVTS2SI_RND)
- NODE_NAME_CASE(CVTS2UI_RND)
- NODE_NAME_CASE(CVTNEPS2BF16)
- NODE_NAME_CASE(MCVTNEPS2BF16)
- NODE_NAME_CASE(DPBF16PS)
- NODE_NAME_CASE(DPFP16PS)
- NODE_NAME_CASE(MPSADBW)
- NODE_NAME_CASE(LWPINS)
- NODE_NAME_CASE(MGATHER)
- NODE_NAME_CASE(MSCATTER)
- NODE_NAME_CASE(VPDPBUSD)
- NODE_NAME_CASE(VPDPBUSDS)
- NODE_NAME_CASE(VPDPWSSD)
- NODE_NAME_CASE(VPDPWSSDS)
- NODE_NAME_CASE(VPSHUFBITQMB)
- NODE_NAME_CASE(GF2P8MULB)
- NODE_NAME_CASE(GF2P8AFFINEQB)
- NODE_NAME_CASE(GF2P8AFFINEINVQB)
- NODE_NAME_CASE(NT_CALL)
- NODE_NAME_CASE(NT_BRIND)
- NODE_NAME_CASE(UMWAIT)
- NODE_NAME_CASE(TPAUSE)
- NODE_NAME_CASE(ENQCMD)
- NODE_NAME_CASE(ENQCMDS)
- NODE_NAME_CASE(VP2INTERSECT)
- NODE_NAME_CASE(VPDPBSUD)
- NODE_NAME_CASE(VPDPBSUDS)
- NODE_NAME_CASE(VPDPBUUD)
- NODE_NAME_CASE(VPDPBUUDS)
- NODE_NAME_CASE(VPDPBSSD)
- NODE_NAME_CASE(VPDPBSSDS)
- NODE_NAME_CASE(VPDPWSUD)
- NODE_NAME_CASE(VPDPWSUDS)
- NODE_NAME_CASE(VPDPWUSD)
- NODE_NAME_CASE(VPDPWUSDS)
- NODE_NAME_CASE(VPDPWUUD)
- NODE_NAME_CASE(VPDPWUUDS)
- NODE_NAME_CASE(VMINMAX)
- NODE_NAME_CASE(VMINMAX_SAE)
- NODE_NAME_CASE(VMINMAXS)
- NODE_NAME_CASE(VMINMAXS_SAE)
- NODE_NAME_CASE(CVTP2IBS)
- NODE_NAME_CASE(CVTP2IUBS)
- NODE_NAME_CASE(CVTP2IBS_RND)
- NODE_NAME_CASE(CVTP2IUBS_RND)
- NODE_NAME_CASE(CVTTP2IBS)
- NODE_NAME_CASE(CVTTP2IUBS)
- NODE_NAME_CASE(CVTTP2IBS_SAE)
- NODE_NAME_CASE(CVTTP2IUBS_SAE)
- NODE_NAME_CASE(VCVT2PH2BF8)
- NODE_NAME_CASE(VCVT2PH2BF8S)
- NODE_NAME_CASE(VCVT2PH2HF8)
- NODE_NAME_CASE(VCVT2PH2HF8S)
- NODE_NAME_CASE(VCVTBIASPH2BF8)
- NODE_NAME_CASE(VCVTBIASPH2BF8S)
- NODE_NAME_CASE(VCVTBIASPH2HF8)
- NODE_NAME_CASE(VCVTBIASPH2HF8S)
- NODE_NAME_CASE(VCVTPH2BF8)
- NODE_NAME_CASE(VCVTPH2BF8S)
- NODE_NAME_CASE(VCVTPH2HF8)
- NODE_NAME_CASE(VCVTPH2HF8S)
- NODE_NAME_CASE(VMCVTBIASPH2BF8)
- NODE_NAME_CASE(VMCVTBIASPH2BF8S)
- NODE_NAME_CASE(VMCVTBIASPH2HF8)
- NODE_NAME_CASE(VMCVTBIASPH2HF8S)
- NODE_NAME_CASE(VMCVTPH2BF8)
- NODE_NAME_CASE(VMCVTPH2BF8S)
- NODE_NAME_CASE(VMCVTPH2HF8)
- NODE_NAME_CASE(VMCVTPH2HF8S)
- NODE_NAME_CASE(VCVTHF82PH)
- NODE_NAME_CASE(AESENC128KL)
- NODE_NAME_CASE(AESDEC128KL)
- NODE_NAME_CASE(AESENC256KL)
- NODE_NAME_CASE(AESDEC256KL)
- NODE_NAME_CASE(AESENCWIDE128KL)
- NODE_NAME_CASE(AESDECWIDE128KL)
- NODE_NAME_CASE(AESENCWIDE256KL)
- NODE_NAME_CASE(AESDECWIDE256KL)
- NODE_NAME_CASE(CMPCCXADD)
- NODE_NAME_CASE(TESTUI)
- NODE_NAME_CASE(FP80_ADD)
- NODE_NAME_CASE(STRICT_FP80_ADD)
- NODE_NAME_CASE(CCMP)
- NODE_NAME_CASE(CTEST)
- NODE_NAME_CASE(CLOAD)
- NODE_NAME_CASE(CSTORE)
- NODE_NAME_CASE(CVTTS2SIS)
- NODE_NAME_CASE(CVTTS2UIS)
- NODE_NAME_CASE(CVTTS2SIS_SAE)
- NODE_NAME_CASE(CVTTS2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS)
- NODE_NAME_CASE(MCVTTP2SIS)
- NODE_NAME_CASE(CVTTP2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS_SAE)
- NODE_NAME_CASE(CVTTP2UIS)
- NODE_NAME_CASE(MCVTTP2UIS)
- NODE_NAME_CASE(POP_FROM_X87_REG)
- }
- return nullptr;
-#undef NODE_NAME_CASE
-}
-
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b7151f65942b4..4365bc0075fdc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
+#include "X86SelectionDAGInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -21,984 +22,6 @@ namespace llvm {
class X86Subtarget;
class X86TargetMachine;
- namespace X86ISD {
- // X86 Specific DAG Nodes
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- /// Bit scan forward.
- BSF,
- /// Bit scan reverse.
- BSR,
-
- /// X86 funnel/double shift i16 instructions. These correspond to
- /// X86::SHLDW and X86::SHRDW instructions which have different amt
- /// modulo rules to generic funnel shifts.
- /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
- FSHL,
- FSHR,
-
- /// Bitwise logical AND of floating point values. This corresponds
- /// to X86::ANDPS or X86::ANDPD.
- FAND,
-
- /// Bitwise logical OR of floating point values. This corresponds
- /// to X86::ORPS or X86::ORPD.
- FOR,
-
- /// Bitwise logical XOR of floating point values. This corresponds
- /// to X86::XORPS or X86::XORPD.
- FXOR,
-
- /// Bitwise logical ANDNOT of floating point values. This
- /// corresponds to X86::ANDNPS or X86::ANDNPD.
- FANDN,
-
- /// These operations represent an abstract X86 call
- /// instruction, which includes a bunch of information. In particular the
- /// operands of these node are:
- ///
- /// #0 - The incoming token chain
- /// #1 - The callee
- /// #2 - The number of arg bytes the caller pushes on the stack.
- /// #3 - The number of arg bytes the callee pops off the stack.
- /// #4 - The value to pass in AL/AX/EAX (optional)
- /// #5 - The value to pass in DL/DX/EDX (optional)
- ///
- /// The result values of these nodes are:
- ///
- /// #0 - The outgoing token chain
- /// #1 - The first register result value (optional)
- /// #2 - The second register result value (optional)
- ///
- CALL,
-
- /// Same as call except it adds the NoTrack prefix.
- NT_CALL,
-
- // Pseudo for a OBJC call that gets emitted together with a special
- // marker instruction.
- CALL_RVMARKER,
-
- /// The same as ISD::CopyFromReg except that this node makes it explicit
- /// that it may lower to an x87 FPU stack pop. Optimizations should be more
- /// cautious when handling this node than a normal CopyFromReg to avoid
- /// removing a required FPU stack pop. A key requirement is optimizations
- /// should not optimize any users of a chain that contains a
- /// POP_FROM_X87_REG to use a chain from a point earlier than the
- /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
- POP_FROM_X87_REG,
-
- // Pseudo for a call to an imported function to ensure the correct machine
- // instruction is emitted for Import Call Optimization.
- IMP_CALL,
-
- /// X86 compare and logical compare instructions.
- CMP,
- FCMP,
- COMI,
- UCOMI,
-
- // X86 compare with Intrinsics similar to COMI.
- COMX,
- UCOMX,
-
- /// X86 bit-test instructions.
- BT,
-
- /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
- /// operand, usually produced by a CMP instruction.
- SETCC,
-
- /// X86 Select
- SELECTS,
-
- // Same as SETCC except it's materialized with a sbb and the value is all
- // one's or all zero's.
- SETCC_CARRY, // R = carry_bit ? ~0 : 0
-
- /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
- /// Operands are two FP values to compare; result is a mask of
- /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
- FSETCC,
-
- /// X86 FP SETCC, similar to above, but with output as an i1 mask and
- /// and a version with SAE.
- FSETCCM,
- FSETCCM_SAE,
-
- /// X86 conditional moves. Operand 0 and operand 1 are the two values
- /// to select from. Operand 2 is the condition code, and operand 3 is the
- /// flag operand produced by a CMP or TEST instruction.
- CMOV,
-
- /// X86 conditional branches. Operand 0 is the chain operand, operand 1
- /// is the block to branch if condition is true, operand 2 is the
- /// condition code, and operand 3 is the flag operand produced by a CMP
- /// or TEST instruction.
- BRCOND,
-
- /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
- /// operand 1 is the target address.
- NT_BRIND,
-
- /// Return with a glue operand. Operand 0 is the chain operand, operand
- /// 1 is the number of bytes of stack to pop.
- RET_GLUE,
-
- /// Return from interrupt. Operand 0 is the number of bytes to pop.
- IRET,
-
- /// Repeat fill, corresponds to X86::REP_STOSx.
- REP_STOS,
-
- /// Repeat move, corresponds to X86::REP_MOVSx.
- REP_MOVS,
-
- /// On Darwin, this node represents the result of the popl
- /// at function entry, used for PIC code.
- GlobalBaseReg,
-
- /// A wrapper node for TargetConstantPool, TargetJumpTable,
- /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
- /// MCSymbol and TargetBlockAddress.
- Wrapper,
-
- /// Special wrapper used under X86-64 PIC mode for RIP
- /// relative displacements.
- WrapperRIP,
-
- /// Copies a 64-bit value from an MMX vector to the low word
- /// of an XMM vector, with the high word zero filled.
- MOVQ2DQ,
-
- /// Copies a 64-bit value from the low word of an XMM vector
- /// to an MMX vector.
- MOVDQ2Q,
-
- /// Copies a 32-bit value from the low word of a MMX
- /// vector to a GPR.
- MMX_MOVD2W,
-
- /// Copies a GPR into the low 32-bit word of a MMX vector
- /// and zero out the high word.
- MMX_MOVW2D,
-
- /// Extract an 8-bit value from a vector and zero extend it to
- /// i32, corresponds to X86::PEXTRB.
- PEXTRB,
-
- /// Extract a 16-bit value from a vector and zero extend it to
- /// i32, corresponds to X86::PEXTRW.
- PEXTRW,
-
- /// Insert any element of a 4 x float vector into any element
- /// of a destination 4 x floatvector.
- INSERTPS,
-
- /// Insert the lower 8-bits of a 32-bit value to a vector,
- /// corresponds to X86::PINSRB.
- PINSRB,
-
- /// Insert the lower 16-bits of a 32-bit value to a vector,
- /// corresponds to X86::PINSRW.
- PINSRW,
-
- /// Shuffle 16 8-bit values within a vector.
- PSHUFB,
-
- /// Compute Sum of Absolute Differences.
- PSADBW,
- /// Compute Double Block Packed Sum-Absolute-Differences
- DBPSADBW,
-
- /// Bitwise Logical AND NOT of Packed FP values.
- ANDNP,
-
- /// Blend where the selector is an immediate.
- BLENDI,
-
- /// Dynamic (non-constant condition) vector blend where only the sign bits
- /// of the condition elements are used. This is used to enforce that the
- /// condition mask is not valid for generic VSELECT optimizations. This
- /// is also used to implement the intrinsics.
- /// Operands are in VSELECT order: MASK, TRUE, FALSE
- BLENDV,
-
- /// Combined add and sub on an FP vector.
- ADDSUB,
-
- // FP vector ops with rounding mode.
- FADD_RND,
- FADDS,
- FADDS_RND,
- FSUB_RND,
- FSUBS,
- FSUBS_RND,
- FMUL_RND,
- FMULS,
- FMULS_RND,
- FDIV_RND,
- FDIVS,
- FDIVS_RND,
- FMAX_SAE,
- FMAXS_SAE,
- FMIN_SAE,
- FMINS_SAE,
- FSQRT_RND,
- FSQRTS,
- FSQRTS_RND,
-
- // FP vector get exponent.
- FGETEXP,
- FGETEXP_SAE,
- FGETEXPS,
- FGETEXPS_SAE,
- // Extract Normalized Mantissas.
- VGETMANT,
- VGETMANT_SAE,
- VGETMANTS,
- VGETMANTS_SAE,
- // FP Scale.
- SCALEF,
- SCALEF_RND,
- SCALEFS,
- SCALEFS_RND,
-
- /// Integer horizontal add/sub.
- HADD,
- HSUB,
-
- /// Floating point horizontal add/sub.
- FHADD,
- FHSUB,
-
- // Detect Conflicts Within a Vector
- CONFLICT,
-
- /// Floating point max and min.
- FMAX,
- FMIN,
-
- /// Commutative FMIN and FMAX.
- FMAXC,
- FMINC,
-
- /// Scalar intrinsic floating point max and min.
- FMAXS,
- FMINS,
-
- /// Floating point reciprocal-sqrt and reciprocal approximation.
- /// Note that these typically require refinement
- /// in order to obtain suitable precision.
- FRSQRT,
- FRCP,
-
- // AVX-512 reciprocal approximations with a little more precision.
- RSQRT14,
- RSQRT14S,
- RCP14,
- RCP14S,
-
- // Thread Local Storage.
- TLSADDR,
-
- // Thread Local Storage. A call to get the start address
- // of the TLS block for the current module.
- TLSBASEADDR,
-
- // Thread Local Storage. When calling to an OS provided
- // thunk at the address from an earlier relocation.
- TLSCALL,
-
- // Thread Local Storage. A descriptor containing pointer to
- // code and to argument to get the TLS offset for the symbol.
- TLSDESC,
-
- // Exception Handling helpers.
- EH_RETURN,
-
- // SjLj exception handling setjmp.
- EH_SJLJ_SETJMP,
-
- // SjLj exception handling longjmp.
- EH_SJLJ_LONGJMP,
-
- // SjLj exception handling dispatch.
- EH_SJLJ_SETUP_DISPATCH,
-
- /// Tail call return. See X86TargetLowering::LowerCall for
- /// the list of operands.
- TC_RETURN,
-
- // Vector move to low scalar and zero higher vector elements.
- VZEXT_MOVL,
-
- // Vector integer truncate.
- VTRUNC,
- // Vector integer truncate with unsigned/signed saturation.
- VTRUNCUS,
- VTRUNCS,
-
- // Masked version of the above. Used when less than a 128-bit result is
- // produced since the mask only applies to the lower elements and can't
- // be represented by a select.
- // SRC, PASSTHRU, MASK
- VMTRUNC,
- VMTRUNCUS,
- VMTRUNCS,
-
- // Vector FP extend.
- VFPEXT,
- VFPEXT_SAE,
- VFPEXTS,
- VFPEXTS_SAE,
-
- // Vector FP round.
- VFPROUND,
- // Convert TWO packed single data to one packed data
- VFPROUND2,
- VFPROUND2_RND,
- VFPROUND_RND,
- VFPROUNDS,
- VFPROUNDS_RND,
-
- // Masked version of above. Used for v2f64->v4f32.
- // SRC, PASSTHRU, MASK
- VMFPROUND,
-
- // 128-bit vector logical left / right shift
- VSHLDQ,
- VSRLDQ,
-
- // Vector shift elements
- VSHL,
- VSRL,
- VSRA,
-
- // Vector variable shift
- VSHLV,
- VSRLV,
- VSRAV,
-
- // Vector shift elements by immediate
- VSHLI,
- VSRLI,
- VSRAI,
-
- // Shifts of mask registers.
- KSHIFTL,
- KSHIFTR,
-
- // Bit rotate by immediate
- VROTLI,
- VROTRI,
-
- // Vector packed double/float comparison.
- CMPP,
-
- // Vector integer comparisons.
- PCMPEQ,
- PCMPGT,
-
- // v8i16 Horizontal minimum and position.
- PHMINPOS,
-
- MULTISHIFT,
-
- /// Vector comparison generating mask bits for fp and
- /// integer signed and unsigned data types.
- CMPM,
- // Vector mask comparison generating mask bits for FP values.
- CMPMM,
- // Vector mask comparison with SAE for FP values.
- CMPMM_SAE,
-
- // Arithmetic operations with FLAGS results.
- ADD,
- SUB,
- ADC,
- SBB,
- SMUL,
- UMUL,
- OR,
- XOR,
- AND,
-
- // Bit field extract.
- BEXTR,
- BEXTRI,
-
- // Zero High Bits Starting with Specified Bit Position.
- BZHI,
-
- // Parallel extract and deposit.
- PDEP,
- PEXT,
-
- // X86-specific multiply by immediate.
- MUL_IMM,
-
- // Vector sign bit extraction.
- MOVMSK,
-
- // Vector bitwise comparisons.
- PTEST,
-
- // Vector packed fp sign bitwise comparisons.
- TESTP,
-
- // OR/AND test for masks.
- KORTEST,
- KTEST,
-
- // ADD for masks.
- KADD,
-
- // Several flavors of instructions with vector shuffle behaviors.
- // Saturated signed/unnsigned packing.
- PACKSS,
- PACKUS,
- // Intra-lane alignr.
- PALIGNR,
- // AVX512 inter-lane alignr.
- VALIGN,
- PSHUFD,
- PSHUFHW,
- PSHUFLW,
- SHUFP,
- // VBMI2 Concat & Shift.
- VSHLD,
- VSHRD,
-
- // Shuffle Packed Values at 128-bit granularity.
- SHUF128,
- MOVDDUP,
- MOVSHDUP,
- MOVSLDUP,
- MOVLHPS,
- MOVHLPS,
- MOVSD,
- MOVSS,
- MOVSH,
- UNPCKL,
- UNPCKH,
- VPERMILPV,
- VPERMILPI,
- VPERMI,
- VPERM2X128,
-
- // Variable Permute (VPERM).
- // Res = VPERMV MaskV, V0
- VPERMV,
-
- // 3-op Variable Permute (VPERMT2).
- // Res = VPERMV3 V0, MaskV, V1
- VPERMV3,
-
- // Bitwise ternary logic.
- VPTERNLOG,
- // Fix Up Special Packed Float32/64 values.
- VFIXUPIMM,
- VFIXUPIMM_SAE,
- VFIXUPIMMS,
- VFIXUPIMMS_SAE,
- // Range Restriction Calculation For Packed Pairs of Float32/64 values.
- VRANGE,
- VRANGE_SAE,
- VRANGES,
- VRANGES_SAE,
- // Reduce - Perform Reduction Transformation on scalar\packed FP.
- VREDUCE,
- VREDUCE_SAE,
- VREDUCES,
- VREDUCES_SAE,
- // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
- // Also used by the legacy (V)ROUND intrinsics where we mask out the
- // scaling part of the immediate.
- VRNDSCALE,
- VRNDSCALE_SAE,
- VRNDSCALES,
- VRNDSCALES_SAE,
- // Tests Types Of a FP Values for packed types.
- VFPCLASS,
- // Tests Types Of a FP Values for scalar types.
- VFPCLASSS,
-
- // Broadcast (splat) scalar or element 0 of a vector. If the operand is
- // a vector, this node may change the vector length as part of the splat.
- VBROADCAST,
- // Broadcast mask to vector.
- VBROADCASTM,
-
- /// SSE4A Extraction and Insertion.
- EXTRQI,
- INSERTQI,
-
- // XOP arithmetic/logical shifts.
- VPSHA,
- VPSHL,
- // XOP signed/unsigned integer comparisons.
- VPCOM,
- VPCOMU,
- // XOP packed permute bytes.
- VPPERM,
- // XOP two source permutation.
- VPERMIL2,
-
- // Vector multiply packed unsigned doubleword integers.
- PMULUDQ,
- // Vector multiply packed signed doubleword integers.
- PMULDQ,
- // Vector Multiply Packed UnsignedIntegers with Round and Scale.
- MULHRS,
-
- // Multiply and Add Packed Integers.
- VPMADDUBSW,
- VPMADDWD,
-
- // AVX512IFMA multiply and add.
- // NOTE: These are different than the instruction and perform
- // op0 x op1 + op2.
- VPMADD52L,
- VPMADD52H,
-
- // VNNI
- VPDPBUSD,
- VPDPBUSDS,
- VPDPWSSD,
- VPDPWSSDS,
-
- // FMA nodes.
- // We use the target independent ISD::FMA for the non-inverted case.
- FNMADD,
- FMSUB,
- FNMSUB,
- FMADDSUB,
- FMSUBADD,
-
- // FMA with rounding mode.
- FMADD_RND,
- FNMADD_RND,
- FMSUB_RND,
- FNMSUB_RND,
- FMADDSUB_RND,
- FMSUBADD_RND,
-
- // AVX512-FP16 complex addition and multiplication.
- VFMADDC,
- VFMADDC_RND,
- VFCMADDC,
- VFCMADDC_RND,
-
- VFMULC,
- VFMULC_RND,
- VFCMULC,
- VFCMULC_RND,
-
- VFMADDCSH,
- VFMADDCSH_RND,
- VFCMADDCSH,
- VFCMADDCSH_RND,
-
- VFMULCSH,
- VFMULCSH_RND,
- VFCMULCSH,
- VFCMULCSH_RND,
-
- VPDPBSUD,
- VPDPBSUDS,
- VPDPBUUD,
- VPDPBUUDS,
- VPDPBSSD,
- VPDPBSSDS,
-
- VPDPWSUD,
- VPDPWSUDS,
- VPDPWUSD,
- VPDPWUSDS,
- VPDPWUUD,
- VPDPWUUDS,
-
- VMINMAX,
- VMINMAX_SAE,
- VMINMAXS,
- VMINMAXS_SAE,
-
- CVTP2IBS,
- CVTP2IUBS,
- CVTP2IBS_RND,
- CVTP2IUBS_RND,
- CVTTP2IBS,
- CVTTP2IUBS,
- CVTTP2IBS_SAE,
- CVTTP2IUBS_SAE,
-
- MPSADBW,
-
- VCVT2PH2BF8,
- VCVT2PH2BF8S,
- VCVT2PH2HF8,
- VCVT2PH2HF8S,
- VCVTBIASPH2BF8,
- VCVTBIASPH2BF8S,
- VCVTBIASPH2HF8,
- VCVTBIASPH2HF8S,
- VCVTPH2BF8,
- VCVTPH2BF8S,
- VCVTPH2HF8,
- VCVTPH2HF8S,
- VMCVTBIASPH2BF8,
- VMCVTBIASPH2BF8S,
- VMCVTBIASPH2HF8,
- VMCVTBIASPH2HF8S,
- VMCVTPH2BF8,
- VMCVTPH2BF8S,
- VMCVTPH2HF8,
- VMCVTPH2HF8S,
- VCVTHF82PH,
-
- // Compress and expand.
- COMPRESS,
- EXPAND,
-
- // Bits shuffle
- VPSHUFBITQMB,
-
- // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
- SINT_TO_FP_RND,
- UINT_TO_FP_RND,
- SCALAR_SINT_TO_FP,
- SCALAR_UINT_TO_FP,
- SCALAR_SINT_TO_FP_RND,
- SCALAR_UINT_TO_FP_RND,
-
- // Vector float/double to signed/unsigned integer.
- CVTP2SI,
- CVTP2UI,
- CVTP2SI_RND,
- CVTP2UI_RND,
- // Scalar float/double to signed/unsigned integer.
- CVTS2SI,
- CVTS2UI,
- CVTS2SI_RND,
- CVTS2UI_RND,
-
- // Vector float/double to signed/unsigned integer with truncation.
- CVTTP2SI,
- CVTTP2UI,
- CVTTP2SI_SAE,
- CVTTP2UI_SAE,
-
- // Saturation enabled Vector float/double to signed/unsigned
- // integer with truncation.
- CVTTP2SIS,
- CVTTP2UIS,
- CVTTP2SIS_SAE,
- CVTTP2UIS_SAE,
- // Masked versions of above. Used for v2f64 to v4i32.
- // SRC, PASSTHRU, MASK
- MCVTTP2SIS,
- MCVTTP2UIS,
-
- // Scalar float/double to signed/unsigned integer with truncation.
- CVTTS2SI,
- CVTTS2UI,
- CVTTS2SI_SAE,
- CVTTS2UI_SAE,
-
- // Vector signed/unsigned integer to float/double.
- CVTSI2P,
- CVTUI2P,
-
- // Scalar float/double to signed/unsigned integer with saturation.
- CVTTS2SIS,
- CVTTS2UIS,
- CVTTS2SIS_SAE,
- CVTTS2UIS_SAE,
-
- // Masked versions of above. Used for v2f64->v4f32.
- // SRC, PASSTHRU, MASK
- MCVTP2SI,
- MCVTP2UI,
- MCVTTP2SI,
- MCVTTP2UI,
- MCVTSI2P,
- MCVTUI2P,
-
- // Custom handling for FP_TO_xINT_SAT
- FP_TO_SINT_SAT,
- FP_TO_UINT_SAT,
-
- // Vector float to bfloat16.
- // Convert packed single data to packed BF16 data
- CVTNEPS2BF16,
- // Masked version of above.
- // SRC, PASSTHRU, MASK
- MCVTNEPS2BF16,
-
- // Dot product of BF16/FP16 pairs to accumulated into
- // packed single precision.
- DPBF16PS,
- DPFP16PS,
-
- // A stack checking function call. On Windows it's _chkstk call.
- DYN_ALLOCA,
-
- // For allocating variable amounts of stack space when using
- // segmented stacks. Check if the current stacklet has enough space, and
- // falls back to heap allocation if not.
- SEG_ALLOCA,
-
- // For allocating stack space when using stack clash protector.
- // Allocation is performed by block, and each block is probed.
- PROBED_ALLOCA,
-
- // Memory barriers.
- MFENCE,
-
- // Get a random integer and indicate whether it is valid in CF.
- RDRAND,
-
- // Get a NIST SP800-90B & C compliant random integer and
- // indicate whether it is valid in CF.
- RDSEED,
-
- // Protection keys
- // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
- // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
- // value for ECX.
- RDPKRU,
- WRPKRU,
-
- // SSE42 string comparisons.
- // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
- // will emit one or two instructions based on which results are used. If
- // flags and index/mask this allows us to use a single instruction since
- // we won't have to pick and opcode for flags. Instead we can rely on the
- // DAG to CSE everything and decide at isel.
- PCMPISTR,
- PCMPESTR,
-
- // Test if in transactional execution.
- XTEST,
-
- // Conversions between float and half-float.
- CVTPS2PH,
- CVTPS2PH_SAE,
- CVTPH2PS,
- CVTPH2PS_SAE,
-
- // Masked version of above.
- // SRC, RND, PASSTHRU, MASK
- MCVTPS2PH,
- MCVTPS2PH_SAE,
-
- // Galois Field Arithmetic Instructions
- GF2P8AFFINEINVQB,
- GF2P8AFFINEQB,
- GF2P8MULB,
-
- // LWP insert record.
- LWPINS,
-
- // User level wait
- UMWAIT,
- TPAUSE,
-
- // Enqueue Stores Instructions
- ENQCMD,
- ENQCMDS,
-
- // For avx512-vp2intersect
- VP2INTERSECT,
-
- // User level interrupts - testui
- TESTUI,
-
- // Perform an FP80 add after changing precision control in FPCW.
- FP80_ADD,
-
- // Conditional compare instructions
- CCMP,
- CTEST,
-
- /// X86 strict FP compare instructions.
- FIRST_STRICTFP_OPCODE,
- STRICT_FCMP = FIRST_STRICTFP_OPCODE,
- STRICT_FCMPS,
-
- // Vector packed double/float comparison.
- STRICT_CMPP,
-
- /// Vector comparison generating mask bits for fp and
- /// integer signed and unsigned data types.
- STRICT_CMPM,
-
- // Vector float/double to signed/unsigned integer with truncation.
- STRICT_CVTTP2SI,
- STRICT_CVTTP2UI,
-
- // Vector FP extend.
- STRICT_VFPEXT,
-
- // Vector FP round.
- STRICT_VFPROUND,
-
- // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
- // Also used by the legacy (V)ROUND intrinsics where we mask out the
- // scaling part of the immediate.
- STRICT_VRNDSCALE,
-
- // Vector signed/unsigned integer to float/double.
- STRICT_CVTSI2P,
- STRICT_CVTUI2P,
-
- // Strict FMA nodes.
- STRICT_FNMADD,
- STRICT_FMSUB,
- STRICT_FNMSUB,
-
- // Conversions between float and half-float.
- STRICT_CVTPS2PH,
- STRICT_CVTPH2PS,
-
- // Perform an FP80 add after changing precision control in FPCW.
- STRICT_FP80_ADD,
-
- /// Floating point max and min.
- STRICT_FMAX,
- STRICT_FMIN,
- LAST_STRICTFP_OPCODE = STRICT_FMIN,
-
- // Compare and swap.
- FIRST_MEMORY_OPCODE,
- LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
- LCMPXCHG8_DAG,
- LCMPXCHG16_DAG,
- LCMPXCHG16_SAVE_RBX_DAG,
-
- /// LOCK-prefixed arithmetic read-modify-write instructions.
- /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD,
- LSUB,
- LOR,
- LXOR,
- LAND,
- LBTS,
- LBTC,
- LBTR,
- LBTS_RM,
- LBTC_RM,
- LBTR_RM,
-
- /// RAO arithmetic instructions.
- /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
- AADD,
- AOR,
- AXOR,
- AAND,
-
- // Load, scalar_to_vector, and zero extend.
- VZEXT_LOAD,
-
- // extract_vector_elt, store.
- VEXTRACT_STORE,
-
- // scalar broadcast from memory.
- VBROADCAST_LOAD,
-
- // subvector broadcast from memory.
- SUBV_BROADCAST_LOAD,
-
- // Store FP control word into i16 memory.
- FNSTCW16m,
-
- // Load FP control word from i16 memory.
- FLDCW16m,
-
- // Store x87 FPU environment into memory.
- FNSTENVm,
-
- // Load x87 FPU environment from memory.
- FLDENVm,
-
- /// This instruction implements FP_TO_SINT with the
- /// integer destination in memory and a FP reg source. This corresponds
- /// to the X86::FIST*m instructions and the rounding mode change stuff. It
- /// has two inputs (token chain and address) and two outputs (int value
- /// and token chain). Memory VT specifies the type to store to.
- FP_TO_INT_IN_MEM,
-
- /// This instruction implements SINT_TO_FP with the
- /// integer source in memory and FP reg result. This corresponds to the
- /// X86::FILD*m instructions. It has two inputs (token chain and address)
- /// and two outputs (FP value and token chain). The integer source type is
- /// specified by the memory VT.
- FILD,
-
- /// This instruction implements a fp->int store from FP stack
- /// slots. This corresponds to the fist instruction. It takes a
- /// chain operand, value to store, address, and glue. The memory VT
- /// specifies the type to store as.
- FIST,
-
- /// This instruction implements an extending load to FP stack slots.
- /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
- /// operand, and ptr to load from. The memory VT specifies the type to
- /// load from.
- FLD,
-
- /// This instruction implements a truncating store from FP stack
- /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
- /// chain operand, value to store, address, and glue. The memory VT
- /// specifies the type to store as.
- FST,
-
- /// These instructions grab the address of the next argument
- /// from a va_list. (reads and modifies the va_list in memory)
- VAARG_64,
- VAARG_X32,
-
- // Vector truncating store with unsigned/signed saturation
- VTRUNCSTOREUS,
- VTRUNCSTORES,
- // Vector truncating masked store with unsigned/signed saturation
- VMTRUNCSTOREUS,
- VMTRUNCSTORES,
-
- // X86 specific gather and scatter
- MGATHER,
- MSCATTER,
-
- // Key locker nodes that produce flags.
- AESENC128KL,
- AESDEC128KL,
- AESENC256KL,
- AESDEC256KL,
- AESENCWIDE128KL,
- AESDECWIDE128KL,
- AESENCWIDE256KL,
- AESDECWIDE256KL,
-
- /// Compare and Add if Condition is Met. Compare value in operand 2 with
- /// value in memory of operand 1. If condition of operand 4 is met, add
- /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
- /// always updated with the original value from operand 1.
- CMPCCXADD,
-
- // Save xmm argument registers to the stack, according to %al. An operator
- // is needed so that this can be expanded with control flow.
- VASTART_SAVE_XMM_REGS,
-
- // Conditional load/store instructions
- CLOAD,
- CSTORE,
- LAST_MEMORY_OPCODE = CSTORE,
- };
- } // end namespace X86ISD
-
namespace X86 {
/// Current rounding mode is represented in bits 11:10 of FPSR. These
/// values are same as corresponding constants for rounding mode used
@@ -1186,9 +209,6 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
- /// This method returns the name of a target specific DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
/// Do not merge vector stores after legalization because that may conflict
/// with x86-specific store splitting optimizations.
bool mergeStoresAfterLegalization(EVT MemVT) const override {
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index a61bbe56d9c26..4e3119f542c4c 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -942,10 +942,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Glue.getNode())
RetOps.push_back(Glue);
- X86ISD::NodeType opcode = X86ISD::RET_GLUE;
+ unsigned RetOpcode = X86ISD::RET_GLUE;
if (CallConv == CallingConv::X86_INTR)
- opcode = X86ISD::IRET;
- return DAG.getNode(opcode, dl, MVT::Other, RetOps);
+ RetOpcode = X86ISD::IRET;
+ return DAG.getNode(RetOpcode, dl, MVT::Other, RetOps);
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index 116986a0fffea..fcf791b1a1f0f 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -141,8 +141,12 @@ def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
-def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
-def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
+ def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
+}
+
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>;
@@ -790,8 +794,11 @@ def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
[SDNPHasChain,SDNPCommutative]>;
+
def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs),
[(X86strict_fp80_add node:$lhs, node:$rhs),
(X86fp80_add node:$lhs, node:$rhs)]>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5321ecf0c1b2c..35c4d89a1b231 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -46,10 +46,12 @@ def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
-def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp,
- [SDNPHasChain]>;
-def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp,
- [SDNPHasChain]>;
+let IsStrictFP = true in {
+ def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp,
+ [SDNPHasChain]>;
+ def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp,
+ [SDNPHasChain]>;
+}
def X86any_fmin : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_fmin node:$src1, node:$src2),
@@ -146,6 +148,7 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>]>>;
+let IsStrictFP = true in
def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>]>,
@@ -165,6 +168,7 @@ def X86vfpround2 : SDNode<"X86ISD::VFPROUND2",
SDTCisSameAs<1, 2>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+let IsStrictFP = true in
def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>,
@@ -215,7 +219,10 @@ def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
+
+let IsStrictFP = true in
def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>;
+
def X86any_cmpp : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_cmpp node:$src1, node:$src2, node:$src3),
(X86cmpp node:$src1, node:$src2, node:$src3)]>;
@@ -235,7 +242,10 @@ def X86CmpMaskCCScalar :
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>;
+
+let IsStrictFP = true in
def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>;
+
def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_cmpm node:$src1, node:$src2, node:$src3),
(X86cmpm node:$src1, node:$src2, node:$src3)]>;
@@ -494,8 +504,11 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
+
+let IsStrictFP = true in
def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm,
[SDNPHasChain]>;
+
def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_VRndScale node:$src1, node:$src2),
(X86VRndScale node:$src1, node:$src2)]>;
@@ -554,17 +567,26 @@ def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+
def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fnmadd node:$src1, node:$src2, node:$src3),
(X86Fnmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+
def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fmsub node:$src1, node:$src2, node:$src3),
(X86Fmsub node:$src1, node:$src2, node:$src3)]>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+
def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fnmsub node:$src1, node:$src2, node:$src3),
(X86Fnmsub node:$src1, node:$src2, node:$src3)]>;
@@ -709,8 +731,12 @@ def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
// cvtt fp-to-int staff
def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>;
def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>;
-def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>;
-def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>;
+ def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>;
+}
+
def X86any_cvttp2si : PatFrags<(ops node:$src),
[(X86strict_cvttp2si node:$src),
(X86cvttp2si node:$src)]>;
@@ -720,8 +746,12 @@ def X86any_cvttp2ui : PatFrags<(ops node:$src),
def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>;
def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>;
-def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>;
-def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>;
+ def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>;
+}
+
def X86any_VSintToFP : PatFrags<(ops node:$src),
[(X86strict_VSintToFP node:$src),
(X86VSintToFP node:$src)]>;
@@ -761,8 +791,11 @@ def X86mcvttp2uis : SDNode<"X86ISD::MCVTTP2UIS", SDTMFloatToInt>;
def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>]>;
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>;
+
+let IsStrictFP = true in
def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps,
[SDNPHasChain]>;
+
def X86any_cvtph2ps : PatFrags<(ops node:$src),
[(X86strict_cvtph2ps node:$src),
(X86cvtph2ps node:$src)]>;
@@ -773,8 +806,11 @@ def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>]>;
def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>;
+
+let IsStrictFP = true in
def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph,
[SDNPHasChain]>;
+
def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_cvtps2ph node:$src1, node:$src2),
(X86cvtps2ph node:$src1, node:$src2)]>;
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index aba62c36546f9..11e45616653ac 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "X86SelectionDAGInfo.h"
-#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
@@ -19,6 +18,9 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
+#define GET_SDNODE_DESC
+#include "X86GenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "x86-selectiondag-info"
@@ -27,14 +29,77 @@ static cl::opt<bool>
UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
cl::desc("Use fast short rep mov in memcpy lowering"));
+X86SelectionDAGInfo::X86SelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(X86GenSDNodeInfo) {}
+
+const char *X86SelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define NODE_NAME_CASE(NODE) \
+ case X86ISD::NODE: \
+ return "X86ISD::" #NODE;
+
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<X86ISD::NodeType>(Opcode)) {
+ NODE_NAME_CASE(POP_FROM_X87_REG)
+ NODE_NAME_CASE(GlobalBaseReg)
+ NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
+ NODE_NAME_CASE(PCMPESTR)
+ NODE_NAME_CASE(PCMPISTR)
+ NODE_NAME_CASE(MGATHER)
+ NODE_NAME_CASE(MSCATTER)
+ NODE_NAME_CASE(AESENCWIDE128KL)
+ NODE_NAME_CASE(AESDECWIDE128KL)
+ NODE_NAME_CASE(AESENCWIDE256KL)
+ NODE_NAME_CASE(AESDECWIDE256KL)
+ }
+#undef NODE_NAME_CASE
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
- Opcode <= X86ISD::LAST_MEMORY_OPCODE;
+ // These nodes don't have corresponding entries in *.td files yet.
+ if (Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
+ Opcode <= X86ISD::LAST_MEMORY_OPCODE)
+ return true;
+
+ return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
}
-bool X86SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
- return Opcode >= X86ISD::FIRST_STRICTFP_OPCODE &&
- Opcode <= X86ISD::LAST_STRICTFP_OPCODE;
+void X86SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case X86ISD::VP2INTERSECT:
+ // invalid number of results; expected 1, got 2
+ case X86ISD::VTRUNCSTOREUS:
+ case X86ISD::VTRUNCSTORES:
+ case X86ISD::FSETCCM_SAE:
+ // invalid number of operands; expected 3, got 4
+ case X86ISD::CVTPH2PS:
+ case X86ISD::CVTTP2SI_SAE:
+ case X86ISD::CVTTP2UI_SAE:
+ case X86ISD::CVTTP2IBS_SAE:
+ // invalid number of operands; expected 1, got 2
+ case X86ISD::CMPMM_SAE:
+ // invalid number of operands; expected 4, got 5
+ case X86ISD::CMPMM:
+ case X86ISD::FSETCCM:
+ // operand #2 must have type i8, but has type i32
+ case X86ISD::CALL:
+ case X86ISD::NT_BRIND:
+ // operand #1 must have type i32 (iPTR), but has type i64
+ case X86ISD::ADD:
+ // result #1 must have type i32, but has type i128
+ case X86ISD::INSERTQI:
+ case X86ISD::EXTRQI:
+ // result #0 must have type v2i64, but has type v16i8/v8i16
+ case X86ISD::CMPCCXADD:
+ // operand #4 must have type i8, but has type i32
+ return;
+ }
+
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
/// Returns the best type to use with repmovs/repstos depending on alignment.
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h
index e77e16bab830d..19c5986982614 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -15,20 +15,68 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "X86GenSDNodeInfo.inc"
+
namespace llvm {
+namespace X86ISD {
+
+enum NodeType : unsigned {
+ /// The same as ISD::CopyFromReg except that this node makes it explicit
+ /// that it may lower to an x87 FPU stack pop. Optimizations should be more
+ /// cautious when handling this node than a normal CopyFromReg to avoid
+ /// removing a required FPU stack pop. A key requirement is optimizations
+ /// should not optimize any users of a chain that contains a
+ /// POP_FROM_X87_REG to use a chain from a point earlier than the
+ /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
+ POP_FROM_X87_REG = X86ISD::GENERATED_OPCODE_END,
+
+ /// On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ // SSE42 string comparisons.
+ // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
+ // will emit one or two instructions based on which results are used. If
+ // flags and index/mask this allows us to use a single instruction since
+ // we won't have to pick and opcode for flags. Instead we can rely on the
+ // DAG to CSE everything and decide at isel.
+ PCMPISTR,
+ PCMPESTR,
+
+ // Compare and swap.
+ FIRST_MEMORY_OPCODE,
+ LCMPXCHG16_SAVE_RBX_DAG = FIRST_MEMORY_OPCODE,
-class X86SelectionDAGInfo : public SelectionDAGTargetInfo {
+ // X86 specific gather and scatter
+ MGATHER,
+ MSCATTER,
+
+ // Key locker nodes that produce flags.
+ AESENCWIDE128KL,
+ AESDECWIDE128KL,
+ AESENCWIDE256KL,
+ AESDECWIDE256KL,
+ LAST_MEMORY_OPCODE = AESDECWIDE256KL,
+};
+
+} // namespace X86ISD
+
+class X86SelectionDAGInfo : public SelectionDAGGenTargetInfo {
/// Returns true if it is possible for the base register to conflict with the
/// given set of clobbers for a memory intrinsic.
bool isBaseRegConflictPossible(SelectionDAG &DAG,
ArrayRef<MCPhysReg> ClobberSet) const;
public:
- explicit X86SelectionDAGInfo() = default;
+ X86SelectionDAGInfo();
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
bool isTargetMemoryOpcode(unsigned Opcode) const override;
- bool isTargetStrictFPOpcode(unsigned Opcode) const override;
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
@@ -44,6 +92,6 @@ class X86SelectionDAGInfo : public SelectionDAGTargetInfo {
MachinePointerInfo SrcPtrInfo) const override;
};
-}
+} // namespace llvm
#endif
>From f42508154c241cbb6509de5cdb29b26b6394fa6f Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Thu, 12 Dec 2024 01:59:14 +0300
Subject: [PATCH 9/9] Generic bugs
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +--
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 +-
3 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c2b4c19846316..cd0137532aaa5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1955,7 +1955,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain,
const SDLoc &DL) {
EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout());
- return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain,
+ return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Other, Chain,
getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4f13f3b128ea4..42f308f77f76a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3186,8 +3186,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
Guard, GuardVal, ISD::SETNE);
// If the guard/stackslot do not equal, branch to failure MBB.
- SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, GuardVal.getOperand(0),
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
// Otherwise branch to success MBB.
SDValue Br = DAG.getNode(ISD::BR, dl,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 5bed32db528d6..7fc14d592d21e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2717,7 +2717,7 @@ getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex) {
void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) {
SDLoc dl(N);
- CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue,
+ CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Other,
CurDAG->getTargetConstant(N->getConstantOperandVal(1),
dl, MVT::i64, true));
}
More information about the llvm-commits
mailing list