[llvm] [X86] TableGen-erate SDNode descriptions (PR #168421)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 10:34:56 PST 2025
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/168421
>From 47c4de66e16373b57971e700374e83b8ff97b799 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:02:48 +0300
Subject: [PATCH 1/3] X86
---
llvm/lib/Target/X86/CMakeLists.txt | 1 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 478 +--------
llvm/lib/Target/X86/X86ISelLowering.h | 982 +------------------
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 6 +-
llvm/lib/Target/X86/X86InstrFragments.td | 11 +-
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 52 +-
llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 77 +-
llvm/lib/Target/X86/X86SelectionDAGInfo.h | 56 +-
8 files changed, 185 insertions(+), 1478 deletions(-)
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index f9bd233cf8ecf..407772197b24a 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -17,6 +17,7 @@ tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info
tablegen(LLVM X86GenMnemonicTables.inc -gen-x86-mnemonic-tables -asmwriternum=1)
tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM X86GenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables -asmwriternum=1)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 621f1868d3311..7db2cda6eba46 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19499,9 +19499,9 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
}
if (!Ret) {
- X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC
- : LocalDynamic ? X86ISD::TLSBASEADDR
- : X86ISD::TLSADDR;
+ unsigned CallType = UseTLSDESC ? X86ISD::TLSDESC
+ : LocalDynamic ? X86ISD::TLSBASEADDR
+ : X86ISD::TLSADDR;
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
if (LoadGlobalBaseReg) {
@@ -29386,7 +29386,7 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
APInt PreferredZero = APInt::getZero(SizeInBits);
APInt OppositeZero = PreferredZero;
EVT IVT = VT.changeTypeToInteger();
- X86ISD::NodeType MinMaxOp;
+ unsigned MinMaxOp;
if (IsMaxOp) {
MinMaxOp = X86ISD::FMAX;
OppositeZero.setSignBit();
@@ -34914,476 +34914,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
}
-const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((X86ISD::NodeType)Opcode) {
- case X86ISD::FIRST_NUMBER: break;
-#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
- NODE_NAME_CASE(BSF)
- NODE_NAME_CASE(BSR)
- NODE_NAME_CASE(FSHL)
- NODE_NAME_CASE(FSHR)
- NODE_NAME_CASE(FAND)
- NODE_NAME_CASE(FANDN)
- NODE_NAME_CASE(FOR)
- NODE_NAME_CASE(FXOR)
- NODE_NAME_CASE(FILD)
- NODE_NAME_CASE(FIST)
- NODE_NAME_CASE(FP_TO_INT_IN_MEM)
- NODE_NAME_CASE(FLD)
- NODE_NAME_CASE(FST)
- NODE_NAME_CASE(CALL)
- NODE_NAME_CASE(CALL_RVMARKER)
- NODE_NAME_CASE(IMP_CALL)
- NODE_NAME_CASE(BT)
- NODE_NAME_CASE(CMP)
- NODE_NAME_CASE(FCMP)
- NODE_NAME_CASE(STRICT_FCMP)
- NODE_NAME_CASE(STRICT_FCMPS)
- NODE_NAME_CASE(COMI)
- NODE_NAME_CASE(UCOMI)
- NODE_NAME_CASE(COMX)
- NODE_NAME_CASE(UCOMX)
- NODE_NAME_CASE(CMPM)
- NODE_NAME_CASE(CMPMM)
- NODE_NAME_CASE(STRICT_CMPM)
- NODE_NAME_CASE(CMPMM_SAE)
- NODE_NAME_CASE(SETCC)
- NODE_NAME_CASE(SETCC_CARRY)
- NODE_NAME_CASE(FSETCC)
- NODE_NAME_CASE(FSETCCM)
- NODE_NAME_CASE(FSETCCM_SAE)
- NODE_NAME_CASE(CMOV)
- NODE_NAME_CASE(BRCOND)
- NODE_NAME_CASE(RET_GLUE)
- NODE_NAME_CASE(IRET)
- NODE_NAME_CASE(REP_STOS)
- NODE_NAME_CASE(REP_MOVS)
- NODE_NAME_CASE(GlobalBaseReg)
- NODE_NAME_CASE(Wrapper)
- NODE_NAME_CASE(WrapperRIP)
- NODE_NAME_CASE(MOVQ2DQ)
- NODE_NAME_CASE(MOVDQ2Q)
- NODE_NAME_CASE(MMX_MOVD2W)
- NODE_NAME_CASE(MMX_MOVW2D)
- NODE_NAME_CASE(PEXTRB)
- NODE_NAME_CASE(PEXTRW)
- NODE_NAME_CASE(INSERTPS)
- NODE_NAME_CASE(PINSRB)
- NODE_NAME_CASE(PINSRW)
- NODE_NAME_CASE(PSHUFB)
- NODE_NAME_CASE(ANDNP)
- NODE_NAME_CASE(BLENDI)
- NODE_NAME_CASE(BLENDV)
- NODE_NAME_CASE(HADD)
- NODE_NAME_CASE(HSUB)
- NODE_NAME_CASE(FHADD)
- NODE_NAME_CASE(FHSUB)
- NODE_NAME_CASE(CONFLICT)
- NODE_NAME_CASE(FMAX)
- NODE_NAME_CASE(FMAXS)
- NODE_NAME_CASE(FMAX_SAE)
- NODE_NAME_CASE(FMAXS_SAE)
- NODE_NAME_CASE(STRICT_FMAX)
- NODE_NAME_CASE(FMIN)
- NODE_NAME_CASE(FMINS)
- NODE_NAME_CASE(FMIN_SAE)
- NODE_NAME_CASE(FMINS_SAE)
- NODE_NAME_CASE(STRICT_FMIN)
- NODE_NAME_CASE(FMAXC)
- NODE_NAME_CASE(FMINC)
- NODE_NAME_CASE(FRSQRT)
- NODE_NAME_CASE(FRCP)
- NODE_NAME_CASE(EXTRQI)
- NODE_NAME_CASE(INSERTQI)
- NODE_NAME_CASE(TLSADDR)
- NODE_NAME_CASE(TLSBASEADDR)
- NODE_NAME_CASE(TLSCALL)
- NODE_NAME_CASE(TLSDESC)
- NODE_NAME_CASE(EH_SJLJ_SETJMP)
- NODE_NAME_CASE(EH_SJLJ_LONGJMP)
- NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
- NODE_NAME_CASE(EH_RETURN)
- NODE_NAME_CASE(TC_RETURN)
- NODE_NAME_CASE(FNSTCW16m)
- NODE_NAME_CASE(FLDCW16m)
- NODE_NAME_CASE(FNSTENVm)
- NODE_NAME_CASE(FLDENVm)
- NODE_NAME_CASE(LCMPXCHG_DAG)
- NODE_NAME_CASE(LCMPXCHG8_DAG)
- NODE_NAME_CASE(LCMPXCHG16_DAG)
- NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
- NODE_NAME_CASE(LADD)
- NODE_NAME_CASE(LSUB)
- NODE_NAME_CASE(LOR)
- NODE_NAME_CASE(LXOR)
- NODE_NAME_CASE(LAND)
- NODE_NAME_CASE(LBTS)
- NODE_NAME_CASE(LBTC)
- NODE_NAME_CASE(LBTR)
- NODE_NAME_CASE(LBTS_RM)
- NODE_NAME_CASE(LBTC_RM)
- NODE_NAME_CASE(LBTR_RM)
- NODE_NAME_CASE(AADD)
- NODE_NAME_CASE(AOR)
- NODE_NAME_CASE(AXOR)
- NODE_NAME_CASE(AAND)
- NODE_NAME_CASE(VZEXT_MOVL)
- NODE_NAME_CASE(VZEXT_LOAD)
- NODE_NAME_CASE(VEXTRACT_STORE)
- NODE_NAME_CASE(VTRUNC)
- NODE_NAME_CASE(VTRUNCS)
- NODE_NAME_CASE(VTRUNCUS)
- NODE_NAME_CASE(VMTRUNC)
- NODE_NAME_CASE(VMTRUNCS)
- NODE_NAME_CASE(VMTRUNCUS)
- NODE_NAME_CASE(VTRUNCSTORES)
- NODE_NAME_CASE(VTRUNCSTOREUS)
- NODE_NAME_CASE(VMTRUNCSTORES)
- NODE_NAME_CASE(VMTRUNCSTOREUS)
- NODE_NAME_CASE(VFPEXT)
- NODE_NAME_CASE(STRICT_VFPEXT)
- NODE_NAME_CASE(VFPEXT_SAE)
- NODE_NAME_CASE(VFPEXTS)
- NODE_NAME_CASE(VFPEXTS_SAE)
- NODE_NAME_CASE(VFPROUND)
- NODE_NAME_CASE(VFPROUND2)
- NODE_NAME_CASE(VFPROUND2_RND)
- NODE_NAME_CASE(STRICT_VFPROUND)
- NODE_NAME_CASE(VMFPROUND)
- NODE_NAME_CASE(VFPROUND_RND)
- NODE_NAME_CASE(VFPROUNDS)
- NODE_NAME_CASE(VFPROUNDS_RND)
- NODE_NAME_CASE(VSHLDQ)
- NODE_NAME_CASE(VSRLDQ)
- NODE_NAME_CASE(VSHL)
- NODE_NAME_CASE(VSRL)
- NODE_NAME_CASE(VSRA)
- NODE_NAME_CASE(VSHLI)
- NODE_NAME_CASE(VSRLI)
- NODE_NAME_CASE(VSRAI)
- NODE_NAME_CASE(VSHLV)
- NODE_NAME_CASE(VSRLV)
- NODE_NAME_CASE(VSRAV)
- NODE_NAME_CASE(VROTLI)
- NODE_NAME_CASE(VROTRI)
- NODE_NAME_CASE(VPPERM)
- NODE_NAME_CASE(CMPP)
- NODE_NAME_CASE(STRICT_CMPP)
- NODE_NAME_CASE(PCMPEQ)
- NODE_NAME_CASE(PCMPGT)
- NODE_NAME_CASE(PHMINPOS)
- NODE_NAME_CASE(ADD)
- NODE_NAME_CASE(SUB)
- NODE_NAME_CASE(ADC)
- NODE_NAME_CASE(SBB)
- NODE_NAME_CASE(SMUL)
- NODE_NAME_CASE(UMUL)
- NODE_NAME_CASE(OR)
- NODE_NAME_CASE(XOR)
- NODE_NAME_CASE(AND)
- NODE_NAME_CASE(BEXTR)
- NODE_NAME_CASE(BEXTRI)
- NODE_NAME_CASE(BZHI)
- NODE_NAME_CASE(PDEP)
- NODE_NAME_CASE(PEXT)
- NODE_NAME_CASE(MUL_IMM)
- NODE_NAME_CASE(MOVMSK)
- NODE_NAME_CASE(PTEST)
- NODE_NAME_CASE(TESTP)
- NODE_NAME_CASE(KORTEST)
- NODE_NAME_CASE(KTEST)
- NODE_NAME_CASE(KADD)
- NODE_NAME_CASE(KSHIFTL)
- NODE_NAME_CASE(KSHIFTR)
- NODE_NAME_CASE(PACKSS)
- NODE_NAME_CASE(PACKUS)
- NODE_NAME_CASE(PALIGNR)
- NODE_NAME_CASE(VALIGN)
- NODE_NAME_CASE(VSHLD)
- NODE_NAME_CASE(VSHRD)
- NODE_NAME_CASE(PSHUFD)
- NODE_NAME_CASE(PSHUFHW)
- NODE_NAME_CASE(PSHUFLW)
- NODE_NAME_CASE(SHUFP)
- NODE_NAME_CASE(SHUF128)
- NODE_NAME_CASE(MOVLHPS)
- NODE_NAME_CASE(MOVHLPS)
- NODE_NAME_CASE(MOVDDUP)
- NODE_NAME_CASE(MOVSHDUP)
- NODE_NAME_CASE(MOVSLDUP)
- NODE_NAME_CASE(MOVSD)
- NODE_NAME_CASE(MOVSS)
- NODE_NAME_CASE(MOVSH)
- NODE_NAME_CASE(UNPCKL)
- NODE_NAME_CASE(UNPCKH)
- NODE_NAME_CASE(VBROADCAST)
- NODE_NAME_CASE(VBROADCAST_LOAD)
- NODE_NAME_CASE(VBROADCASTM)
- NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
- NODE_NAME_CASE(VPERMILPV)
- NODE_NAME_CASE(VPERMILPI)
- NODE_NAME_CASE(VPERM2X128)
- NODE_NAME_CASE(VPERMV)
- NODE_NAME_CASE(VPERMV3)
- NODE_NAME_CASE(VPERMI)
- NODE_NAME_CASE(VPTERNLOG)
- NODE_NAME_CASE(FP_TO_SINT_SAT)
- NODE_NAME_CASE(FP_TO_UINT_SAT)
- NODE_NAME_CASE(VFIXUPIMM)
- NODE_NAME_CASE(VFIXUPIMM_SAE)
- NODE_NAME_CASE(VFIXUPIMMS)
- NODE_NAME_CASE(VFIXUPIMMS_SAE)
- NODE_NAME_CASE(VRANGE)
- NODE_NAME_CASE(VRANGE_SAE)
- NODE_NAME_CASE(VRANGES)
- NODE_NAME_CASE(VRANGES_SAE)
- NODE_NAME_CASE(PMULUDQ)
- NODE_NAME_CASE(PMULDQ)
- NODE_NAME_CASE(PSADBW)
- NODE_NAME_CASE(DBPSADBW)
- NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
- NODE_NAME_CASE(VAARG_64)
- NODE_NAME_CASE(VAARG_X32)
- NODE_NAME_CASE(DYN_ALLOCA)
- NODE_NAME_CASE(MFENCE)
- NODE_NAME_CASE(SEG_ALLOCA)
- NODE_NAME_CASE(PROBED_ALLOCA)
- NODE_NAME_CASE(RDRAND)
- NODE_NAME_CASE(RDSEED)
- NODE_NAME_CASE(RDPKRU)
- NODE_NAME_CASE(WRPKRU)
- NODE_NAME_CASE(VPMADDUBSW)
- NODE_NAME_CASE(VPMADDWD)
- NODE_NAME_CASE(VPSHA)
- NODE_NAME_CASE(VPSHL)
- NODE_NAME_CASE(VPCOM)
- NODE_NAME_CASE(VPCOMU)
- NODE_NAME_CASE(VPERMIL2)
- NODE_NAME_CASE(FMSUB)
- NODE_NAME_CASE(STRICT_FMSUB)
- NODE_NAME_CASE(FNMADD)
- NODE_NAME_CASE(STRICT_FNMADD)
- NODE_NAME_CASE(FNMSUB)
- NODE_NAME_CASE(STRICT_FNMSUB)
- NODE_NAME_CASE(FMADDSUB)
- NODE_NAME_CASE(FMSUBADD)
- NODE_NAME_CASE(FMADD_RND)
- NODE_NAME_CASE(FNMADD_RND)
- NODE_NAME_CASE(FMSUB_RND)
- NODE_NAME_CASE(FNMSUB_RND)
- NODE_NAME_CASE(FMADDSUB_RND)
- NODE_NAME_CASE(FMSUBADD_RND)
- NODE_NAME_CASE(VFMADDC)
- NODE_NAME_CASE(VFMADDC_RND)
- NODE_NAME_CASE(VFCMADDC)
- NODE_NAME_CASE(VFCMADDC_RND)
- NODE_NAME_CASE(VFMULC)
- NODE_NAME_CASE(VFMULC_RND)
- NODE_NAME_CASE(VFCMULC)
- NODE_NAME_CASE(VFCMULC_RND)
- NODE_NAME_CASE(VFMULCSH)
- NODE_NAME_CASE(VFMULCSH_RND)
- NODE_NAME_CASE(VFCMULCSH)
- NODE_NAME_CASE(VFCMULCSH_RND)
- NODE_NAME_CASE(VFMADDCSH)
- NODE_NAME_CASE(VFMADDCSH_RND)
- NODE_NAME_CASE(VFCMADDCSH)
- NODE_NAME_CASE(VFCMADDCSH_RND)
- NODE_NAME_CASE(VPMADD52H)
- NODE_NAME_CASE(VPMADD52L)
- NODE_NAME_CASE(VRNDSCALE)
- NODE_NAME_CASE(STRICT_VRNDSCALE)
- NODE_NAME_CASE(VRNDSCALE_SAE)
- NODE_NAME_CASE(VRNDSCALES)
- NODE_NAME_CASE(VRNDSCALES_SAE)
- NODE_NAME_CASE(VREDUCE)
- NODE_NAME_CASE(VREDUCE_SAE)
- NODE_NAME_CASE(VREDUCES)
- NODE_NAME_CASE(VREDUCES_SAE)
- NODE_NAME_CASE(VGETMANT)
- NODE_NAME_CASE(VGETMANT_SAE)
- NODE_NAME_CASE(VGETMANTS)
- NODE_NAME_CASE(VGETMANTS_SAE)
- NODE_NAME_CASE(PCMPESTR)
- NODE_NAME_CASE(PCMPISTR)
- NODE_NAME_CASE(XTEST)
- NODE_NAME_CASE(COMPRESS)
- NODE_NAME_CASE(EXPAND)
- NODE_NAME_CASE(SELECTS)
- NODE_NAME_CASE(ADDSUB)
- NODE_NAME_CASE(RCP14)
- NODE_NAME_CASE(RCP14S)
- NODE_NAME_CASE(RSQRT14)
- NODE_NAME_CASE(RSQRT14S)
- NODE_NAME_CASE(FADD_RND)
- NODE_NAME_CASE(FADDS)
- NODE_NAME_CASE(FADDS_RND)
- NODE_NAME_CASE(FSUB_RND)
- NODE_NAME_CASE(FSUBS)
- NODE_NAME_CASE(FSUBS_RND)
- NODE_NAME_CASE(FMUL_RND)
- NODE_NAME_CASE(FMULS)
- NODE_NAME_CASE(FMULS_RND)
- NODE_NAME_CASE(FDIV_RND)
- NODE_NAME_CASE(FDIVS)
- NODE_NAME_CASE(FDIVS_RND)
- NODE_NAME_CASE(FSQRT_RND)
- NODE_NAME_CASE(FSQRTS)
- NODE_NAME_CASE(FSQRTS_RND)
- NODE_NAME_CASE(FGETEXP)
- NODE_NAME_CASE(FGETEXP_SAE)
- NODE_NAME_CASE(FGETEXPS)
- NODE_NAME_CASE(FGETEXPS_SAE)
- NODE_NAME_CASE(SCALEF)
- NODE_NAME_CASE(SCALEF_RND)
- NODE_NAME_CASE(SCALEFS)
- NODE_NAME_CASE(SCALEFS_RND)
- NODE_NAME_CASE(MULHRS)
- NODE_NAME_CASE(SINT_TO_FP_RND)
- NODE_NAME_CASE(UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTTP2SI)
- NODE_NAME_CASE(CVTTP2UI)
- NODE_NAME_CASE(STRICT_CVTTP2SI)
- NODE_NAME_CASE(STRICT_CVTTP2UI)
- NODE_NAME_CASE(MCVTTP2SI)
- NODE_NAME_CASE(MCVTTP2UI)
- NODE_NAME_CASE(CVTTP2SI_SAE)
- NODE_NAME_CASE(CVTTP2UI_SAE)
- NODE_NAME_CASE(CVTTS2SI)
- NODE_NAME_CASE(CVTTS2UI)
- NODE_NAME_CASE(CVTTS2SI_SAE)
- NODE_NAME_CASE(CVTTS2UI_SAE)
- NODE_NAME_CASE(CVTSI2P)
- NODE_NAME_CASE(CVTUI2P)
- NODE_NAME_CASE(STRICT_CVTSI2P)
- NODE_NAME_CASE(STRICT_CVTUI2P)
- NODE_NAME_CASE(MCVTSI2P)
- NODE_NAME_CASE(MCVTUI2P)
- NODE_NAME_CASE(VFPCLASS)
- NODE_NAME_CASE(VFPCLASSS)
- NODE_NAME_CASE(MULTISHIFT)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTPS2PH)
- NODE_NAME_CASE(STRICT_CVTPS2PH)
- NODE_NAME_CASE(CVTPS2PH_SAE)
- NODE_NAME_CASE(MCVTPS2PH)
- NODE_NAME_CASE(MCVTPS2PH_SAE)
- NODE_NAME_CASE(CVTPH2PS)
- NODE_NAME_CASE(STRICT_CVTPH2PS)
- NODE_NAME_CASE(CVTPH2PS_SAE)
- NODE_NAME_CASE(CVTP2SI)
- NODE_NAME_CASE(CVTP2UI)
- NODE_NAME_CASE(MCVTP2SI)
- NODE_NAME_CASE(MCVTP2UI)
- NODE_NAME_CASE(CVTP2SI_RND)
- NODE_NAME_CASE(CVTP2UI_RND)
- NODE_NAME_CASE(CVTS2SI)
- NODE_NAME_CASE(CVTS2UI)
- NODE_NAME_CASE(CVTS2SI_RND)
- NODE_NAME_CASE(CVTS2UI_RND)
- NODE_NAME_CASE(CVTNEPS2BF16)
- NODE_NAME_CASE(MCVTNEPS2BF16)
- NODE_NAME_CASE(DPBF16PS)
- NODE_NAME_CASE(DPFP16PS)
- NODE_NAME_CASE(MPSADBW)
- NODE_NAME_CASE(LWPINS)
- NODE_NAME_CASE(MGATHER)
- NODE_NAME_CASE(MSCATTER)
- NODE_NAME_CASE(VPDPBUSD)
- NODE_NAME_CASE(VPDPBUSDS)
- NODE_NAME_CASE(VPDPWSSD)
- NODE_NAME_CASE(VPDPWSSDS)
- NODE_NAME_CASE(VPSHUFBITQMB)
- NODE_NAME_CASE(GF2P8MULB)
- NODE_NAME_CASE(GF2P8AFFINEQB)
- NODE_NAME_CASE(GF2P8AFFINEINVQB)
- NODE_NAME_CASE(NT_CALL)
- NODE_NAME_CASE(NT_BRIND)
- NODE_NAME_CASE(UMWAIT)
- NODE_NAME_CASE(TPAUSE)
- NODE_NAME_CASE(ENQCMD)
- NODE_NAME_CASE(ENQCMDS)
- NODE_NAME_CASE(VP2INTERSECT)
- NODE_NAME_CASE(VPDPBSUD)
- NODE_NAME_CASE(VPDPBSUDS)
- NODE_NAME_CASE(VPDPBUUD)
- NODE_NAME_CASE(VPDPBUUDS)
- NODE_NAME_CASE(VPDPBSSD)
- NODE_NAME_CASE(VPDPBSSDS)
- NODE_NAME_CASE(VPDPWSUD)
- NODE_NAME_CASE(VPDPWSUDS)
- NODE_NAME_CASE(VPDPWUSD)
- NODE_NAME_CASE(VPDPWUSDS)
- NODE_NAME_CASE(VPDPWUUD)
- NODE_NAME_CASE(VPDPWUUDS)
- NODE_NAME_CASE(VMINMAX)
- NODE_NAME_CASE(VMINMAX_SAE)
- NODE_NAME_CASE(VMINMAXS)
- NODE_NAME_CASE(VMINMAXS_SAE)
- NODE_NAME_CASE(CVTP2IBS)
- NODE_NAME_CASE(CVTP2IUBS)
- NODE_NAME_CASE(CVTP2IBS_RND)
- NODE_NAME_CASE(CVTP2IUBS_RND)
- NODE_NAME_CASE(CVTTP2IBS)
- NODE_NAME_CASE(CVTTP2IUBS)
- NODE_NAME_CASE(CVTTP2IBS_SAE)
- NODE_NAME_CASE(CVTTP2IUBS_SAE)
- NODE_NAME_CASE(VCVT2PH2BF8)
- NODE_NAME_CASE(VCVT2PH2BF8S)
- NODE_NAME_CASE(VCVT2PH2HF8)
- NODE_NAME_CASE(VCVT2PH2HF8S)
- NODE_NAME_CASE(VCVTBIASPH2BF8)
- NODE_NAME_CASE(VCVTBIASPH2BF8S)
- NODE_NAME_CASE(VCVTBIASPH2HF8)
- NODE_NAME_CASE(VCVTBIASPH2HF8S)
- NODE_NAME_CASE(VCVTPH2BF8)
- NODE_NAME_CASE(VCVTPH2BF8S)
- NODE_NAME_CASE(VCVTPH2HF8)
- NODE_NAME_CASE(VCVTPH2HF8S)
- NODE_NAME_CASE(VMCVTBIASPH2BF8)
- NODE_NAME_CASE(VMCVTBIASPH2BF8S)
- NODE_NAME_CASE(VMCVTBIASPH2HF8)
- NODE_NAME_CASE(VMCVTBIASPH2HF8S)
- NODE_NAME_CASE(VMCVTPH2BF8)
- NODE_NAME_CASE(VMCVTPH2BF8S)
- NODE_NAME_CASE(VMCVTPH2HF8)
- NODE_NAME_CASE(VMCVTPH2HF8S)
- NODE_NAME_CASE(VCVTHF82PH)
- NODE_NAME_CASE(AESENC128KL)
- NODE_NAME_CASE(AESDEC128KL)
- NODE_NAME_CASE(AESENC256KL)
- NODE_NAME_CASE(AESDEC256KL)
- NODE_NAME_CASE(AESENCWIDE128KL)
- NODE_NAME_CASE(AESDECWIDE128KL)
- NODE_NAME_CASE(AESENCWIDE256KL)
- NODE_NAME_CASE(AESDECWIDE256KL)
- NODE_NAME_CASE(CMPCCXADD)
- NODE_NAME_CASE(TESTUI)
- NODE_NAME_CASE(FP80_ADD)
- NODE_NAME_CASE(STRICT_FP80_ADD)
- NODE_NAME_CASE(CCMP)
- NODE_NAME_CASE(CTEST)
- NODE_NAME_CASE(CLOAD)
- NODE_NAME_CASE(CSTORE)
- NODE_NAME_CASE(CVTTS2SIS)
- NODE_NAME_CASE(CVTTS2UIS)
- NODE_NAME_CASE(CVTTS2SIS_SAE)
- NODE_NAME_CASE(CVTTS2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS)
- NODE_NAME_CASE(MCVTTP2SIS)
- NODE_NAME_CASE(CVTTP2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS_SAE)
- NODE_NAME_CASE(CVTTP2UIS)
- NODE_NAME_CASE(MCVTTP2UIS)
- NODE_NAME_CASE(POP_FROM_X87_REG)
- }
- return nullptr;
-#undef NODE_NAME_CASE
-}
-
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b7151f65942b4..4365bc0075fdc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
+#include "X86SelectionDAGInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -21,984 +22,6 @@ namespace llvm {
class X86Subtarget;
class X86TargetMachine;
- namespace X86ISD {
- // X86 Specific DAG Nodes
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- /// Bit scan forward.
- BSF,
- /// Bit scan reverse.
- BSR,
-
- /// X86 funnel/double shift i16 instructions. These correspond to
- /// X86::SHLDW and X86::SHRDW instructions which have different amt
- /// modulo rules to generic funnel shifts.
- /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
- FSHL,
- FSHR,
-
- /// Bitwise logical AND of floating point values. This corresponds
- /// to X86::ANDPS or X86::ANDPD.
- FAND,
-
- /// Bitwise logical OR of floating point values. This corresponds
- /// to X86::ORPS or X86::ORPD.
- FOR,
-
- /// Bitwise logical XOR of floating point values. This corresponds
- /// to X86::XORPS or X86::XORPD.
- FXOR,
-
- /// Bitwise logical ANDNOT of floating point values. This
- /// corresponds to X86::ANDNPS or X86::ANDNPD.
- FANDN,
-
- /// These operations represent an abstract X86 call
- /// instruction, which includes a bunch of information. In particular the
- /// operands of these node are:
- ///
- /// #0 - The incoming token chain
- /// #1 - The callee
- /// #2 - The number of arg bytes the caller pushes on the stack.
- /// #3 - The number of arg bytes the callee pops off the stack.
- /// #4 - The value to pass in AL/AX/EAX (optional)
- /// #5 - The value to pass in DL/DX/EDX (optional)
- ///
- /// The result values of these nodes are:
- ///
- /// #0 - The outgoing token chain
- /// #1 - The first register result value (optional)
- /// #2 - The second register result value (optional)
- ///
- CALL,
-
- /// Same as call except it adds the NoTrack prefix.
- NT_CALL,
-
- // Pseudo for a OBJC call that gets emitted together with a special
- // marker instruction.
- CALL_RVMARKER,
-
- /// The same as ISD::CopyFromReg except that this node makes it explicit
- /// that it may lower to an x87 FPU stack pop. Optimizations should be more
- /// cautious when handling this node than a normal CopyFromReg to avoid
- /// removing a required FPU stack pop. A key requirement is optimizations
- /// should not optimize any users of a chain that contains a
- /// POP_FROM_X87_REG to use a chain from a point earlier than the
- /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
- POP_FROM_X87_REG,
-
- // Pseudo for a call to an imported function to ensure the correct machine
- // instruction is emitted for Import Call Optimization.
- IMP_CALL,
-
- /// X86 compare and logical compare instructions.
- CMP,
- FCMP,
- COMI,
- UCOMI,
-
- // X86 compare with Intrinsics similar to COMI.
- COMX,
- UCOMX,
-
- /// X86 bit-test instructions.
- BT,
-
- /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
- /// operand, usually produced by a CMP instruction.
- SETCC,
-
- /// X86 Select
- SELECTS,
-
- // Same as SETCC except it's materialized with a sbb and the value is all
- // one's or all zero's.
- SETCC_CARRY, // R = carry_bit ? ~0 : 0
-
- /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
- /// Operands are two FP values to compare; result is a mask of
- /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
- FSETCC,
-
- /// X86 FP SETCC, similar to above, but with output as an i1 mask and
- /// and a version with SAE.
- FSETCCM,
- FSETCCM_SAE,
-
- /// X86 conditional moves. Operand 0 and operand 1 are the two values
- /// to select from. Operand 2 is the condition code, and operand 3 is the
- /// flag operand produced by a CMP or TEST instruction.
- CMOV,
-
- /// X86 conditional branches. Operand 0 is the chain operand, operand 1
- /// is the block to branch if condition is true, operand 2 is the
- /// condition code, and operand 3 is the flag operand produced by a CMP
- /// or TEST instruction.
- BRCOND,
-
- /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
- /// operand 1 is the target address.
- NT_BRIND,
-
- /// Return with a glue operand. Operand 0 is the chain operand, operand
- /// 1 is the number of bytes of stack to pop.
- RET_GLUE,
-
- /// Return from interrupt. Operand 0 is the number of bytes to pop.
- IRET,
-
- /// Repeat fill, corresponds to X86::REP_STOSx.
- REP_STOS,
-
- /// Repeat move, corresponds to X86::REP_MOVSx.
- REP_MOVS,
-
- /// On Darwin, this node represents the result of the popl
- /// at function entry, used for PIC code.
- GlobalBaseReg,
-
- /// A wrapper node for TargetConstantPool, TargetJumpTable,
- /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
- /// MCSymbol and TargetBlockAddress.
- Wrapper,
-
- /// Special wrapper used under X86-64 PIC mode for RIP
- /// relative displacements.
- WrapperRIP,
-
- /// Copies a 64-bit value from an MMX vector to the low word
- /// of an XMM vector, with the high word zero filled.
- MOVQ2DQ,
-
- /// Copies a 64-bit value from the low word of an XMM vector
- /// to an MMX vector.
- MOVDQ2Q,
-
- /// Copies a 32-bit value from the low word of a MMX
- /// vector to a GPR.
- MMX_MOVD2W,
-
- /// Copies a GPR into the low 32-bit word of a MMX vector
- /// and zero out the high word.
- MMX_MOVW2D,
-
- /// Extract an 8-bit value from a vector and zero extend it to
- /// i32, corresponds to X86::PEXTRB.
- PEXTRB,
-
- /// Extract a 16-bit value from a vector and zero extend it to
- /// i32, corresponds to X86::PEXTRW.
- PEXTRW,
-
- /// Insert any element of a 4 x float vector into any element
- /// of a destination 4 x floatvector.
- INSERTPS,
-
- /// Insert the lower 8-bits of a 32-bit value to a vector,
- /// corresponds to X86::PINSRB.
- PINSRB,
-
- /// Insert the lower 16-bits of a 32-bit value to a vector,
- /// corresponds to X86::PINSRW.
- PINSRW,
-
- /// Shuffle 16 8-bit values within a vector.
- PSHUFB,
-
- /// Compute Sum of Absolute Differences.
- PSADBW,
- /// Compute Double Block Packed Sum-Absolute-Differences
- DBPSADBW,
-
- /// Bitwise Logical AND NOT of Packed FP values.
- ANDNP,
-
- /// Blend where the selector is an immediate.
- BLENDI,
-
- /// Dynamic (non-constant condition) vector blend where only the sign bits
- /// of the condition elements are used. This is used to enforce that the
- /// condition mask is not valid for generic VSELECT optimizations. This
- /// is also used to implement the intrinsics.
- /// Operands are in VSELECT order: MASK, TRUE, FALSE
- BLENDV,
-
- /// Combined add and sub on an FP vector.
- ADDSUB,
-
- // FP vector ops with rounding mode.
- FADD_RND,
- FADDS,
- FADDS_RND,
- FSUB_RND,
- FSUBS,
- FSUBS_RND,
- FMUL_RND,
- FMULS,
- FMULS_RND,
- FDIV_RND,
- FDIVS,
- FDIVS_RND,
- FMAX_SAE,
- FMAXS_SAE,
- FMIN_SAE,
- FMINS_SAE,
- FSQRT_RND,
- FSQRTS,
- FSQRTS_RND,
-
- // FP vector get exponent.
- FGETEXP,
- FGETEXP_SAE,
- FGETEXPS,
- FGETEXPS_SAE,
- // Extract Normalized Mantissas.
- VGETMANT,
- VGETMANT_SAE,
- VGETMANTS,
- VGETMANTS_SAE,
- // FP Scale.
- SCALEF,
- SCALEF_RND,
- SCALEFS,
- SCALEFS_RND,
-
- /// Integer horizontal add/sub.
- HADD,
- HSUB,
-
- /// Floating point horizontal add/sub.
- FHADD,
- FHSUB,
-
- // Detect Conflicts Within a Vector
- CONFLICT,
-
- /// Floating point max and min.
- FMAX,
- FMIN,
-
- /// Commutative FMIN and FMAX.
- FMAXC,
- FMINC,
-
- /// Scalar intrinsic floating point max and min.
- FMAXS,
- FMINS,
-
- /// Floating point reciprocal-sqrt and reciprocal approximation.
- /// Note that these typically require refinement
- /// in order to obtain suitable precision.
- FRSQRT,
- FRCP,
-
- // AVX-512 reciprocal approximations with a little more precision.
- RSQRT14,
- RSQRT14S,
- RCP14,
- RCP14S,
-
- // Thread Local Storage.
- TLSADDR,
-
- // Thread Local Storage. A call to get the start address
- // of the TLS block for the current module.
- TLSBASEADDR,
-
- // Thread Local Storage. When calling to an OS provided
- // thunk at the address from an earlier relocation.
- TLSCALL,
-
- // Thread Local Storage. A descriptor containing pointer to
- // code and to argument to get the TLS offset for the symbol.
- TLSDESC,
-
- // Exception Handling helpers.
- EH_RETURN,
-
- // SjLj exception handling setjmp.
- EH_SJLJ_SETJMP,
-
- // SjLj exception handling longjmp.
- EH_SJLJ_LONGJMP,
-
- // SjLj exception handling dispatch.
- EH_SJLJ_SETUP_DISPATCH,
-
- /// Tail call return. See X86TargetLowering::LowerCall for
- /// the list of operands.
- TC_RETURN,
-
- // Vector move to low scalar and zero higher vector elements.
- VZEXT_MOVL,
-
- // Vector integer truncate.
- VTRUNC,
- // Vector integer truncate with unsigned/signed saturation.
- VTRUNCUS,
- VTRUNCS,
-
- // Masked version of the above. Used when less than a 128-bit result is
- // produced since the mask only applies to the lower elements and can't
- // be represented by a select.
- // SRC, PASSTHRU, MASK
- VMTRUNC,
- VMTRUNCUS,
- VMTRUNCS,
-
- // Vector FP extend.
- VFPEXT,
- VFPEXT_SAE,
- VFPEXTS,
- VFPEXTS_SAE,
-
- // Vector FP round.
- VFPROUND,
- // Convert TWO packed single data to one packed data
- VFPROUND2,
- VFPROUND2_RND,
- VFPROUND_RND,
- VFPROUNDS,
- VFPROUNDS_RND,
-
- // Masked version of above. Used for v2f64->v4f32.
- // SRC, PASSTHRU, MASK
- VMFPROUND,
-
- // 128-bit vector logical left / right shift
- VSHLDQ,
- VSRLDQ,
-
- // Vector shift elements
- VSHL,
- VSRL,
- VSRA,
-
- // Vector variable shift
- VSHLV,
- VSRLV,
- VSRAV,
-
- // Vector shift elements by immediate
- VSHLI,
- VSRLI,
- VSRAI,
-
- // Shifts of mask registers.
- KSHIFTL,
- KSHIFTR,
-
- // Bit rotate by immediate
- VROTLI,
- VROTRI,
-
- // Vector packed double/float comparison.
- CMPP,
-
- // Vector integer comparisons.
- PCMPEQ,
- PCMPGT,
-
- // v8i16 Horizontal minimum and position.
- PHMINPOS,
-
- MULTISHIFT,
-
- /// Vector comparison generating mask bits for fp and
- /// integer signed and unsigned data types.
- CMPM,
- // Vector mask comparison generating mask bits for FP values.
- CMPMM,
- // Vector mask comparison with SAE for FP values.
- CMPMM_SAE,
-
- // Arithmetic operations with FLAGS results.
- ADD,
- SUB,
- ADC,
- SBB,
- SMUL,
- UMUL,
- OR,
- XOR,
- AND,
-
- // Bit field extract.
- BEXTR,
- BEXTRI,
-
- // Zero High Bits Starting with Specified Bit Position.
- BZHI,
-
- // Parallel extract and deposit.
- PDEP,
- PEXT,
-
- // X86-specific multiply by immediate.
- MUL_IMM,
-
- // Vector sign bit extraction.
- MOVMSK,
-
- // Vector bitwise comparisons.
- PTEST,
-
- // Vector packed fp sign bitwise comparisons.
- TESTP,
-
- // OR/AND test for masks.
- KORTEST,
- KTEST,
-
- // ADD for masks.
- KADD,
-
- // Several flavors of instructions with vector shuffle behaviors.
- // Saturated signed/unnsigned packing.
- PACKSS,
- PACKUS,
- // Intra-lane alignr.
- PALIGNR,
- // AVX512 inter-lane alignr.
- VALIGN,
- PSHUFD,
- PSHUFHW,
- PSHUFLW,
- SHUFP,
- // VBMI2 Concat & Shift.
- VSHLD,
- VSHRD,
-
- // Shuffle Packed Values at 128-bit granularity.
- SHUF128,
- MOVDDUP,
- MOVSHDUP,
- MOVSLDUP,
- MOVLHPS,
- MOVHLPS,
- MOVSD,
- MOVSS,
- MOVSH,
- UNPCKL,
- UNPCKH,
- VPERMILPV,
- VPERMILPI,
- VPERMI,
- VPERM2X128,
-
- // Variable Permute (VPERM).
- // Res = VPERMV MaskV, V0
- VPERMV,
-
- // 3-op Variable Permute (VPERMT2).
- // Res = VPERMV3 V0, MaskV, V1
- VPERMV3,
-
- // Bitwise ternary logic.
- VPTERNLOG,
- // Fix Up Special Packed Float32/64 values.
- VFIXUPIMM,
- VFIXUPIMM_SAE,
- VFIXUPIMMS,
- VFIXUPIMMS_SAE,
- // Range Restriction Calculation For Packed Pairs of Float32/64 values.
- VRANGE,
- VRANGE_SAE,
- VRANGES,
- VRANGES_SAE,
- // Reduce - Perform Reduction Transformation on scalar\packed FP.
- VREDUCE,
- VREDUCE_SAE,
- VREDUCES,
- VREDUCES_SAE,
- // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
- // Also used by the legacy (V)ROUND intrinsics where we mask out the
- // scaling part of the immediate.
- VRNDSCALE,
- VRNDSCALE_SAE,
- VRNDSCALES,
- VRNDSCALES_SAE,
- // Tests Types Of a FP Values for packed types.
- VFPCLASS,
- // Tests Types Of a FP Values for scalar types.
- VFPCLASSS,
-
- // Broadcast (splat) scalar or element 0 of a vector. If the operand is
- // a vector, this node may change the vector length as part of the splat.
- VBROADCAST,
- // Broadcast mask to vector.
- VBROADCASTM,
-
- /// SSE4A Extraction and Insertion.
- EXTRQI,
- INSERTQI,
-
- // XOP arithmetic/logical shifts.
- VPSHA,
- VPSHL,
- // XOP signed/unsigned integer comparisons.
- VPCOM,
- VPCOMU,
- // XOP packed permute bytes.
- VPPERM,
- // XOP two source permutation.
- VPERMIL2,
-
- // Vector multiply packed unsigned doubleword integers.
- PMULUDQ,
- // Vector multiply packed signed doubleword integers.
- PMULDQ,
- // Vector Multiply Packed UnsignedIntegers with Round and Scale.
- MULHRS,
-
- // Multiply and Add Packed Integers.
- VPMADDUBSW,
- VPMADDWD,
-
- // AVX512IFMA multiply and add.
- // NOTE: These are different than the instruction and perform
- // op0 x op1 + op2.
- VPMADD52L,
- VPMADD52H,
-
- // VNNI
- VPDPBUSD,
- VPDPBUSDS,
- VPDPWSSD,
- VPDPWSSDS,
-
- // FMA nodes.
- // We use the target independent ISD::FMA for the non-inverted case.
- FNMADD,
- FMSUB,
- FNMSUB,
- FMADDSUB,
- FMSUBADD,
-
- // FMA with rounding mode.
- FMADD_RND,
- FNMADD_RND,
- FMSUB_RND,
- FNMSUB_RND,
- FMADDSUB_RND,
- FMSUBADD_RND,
-
- // AVX512-FP16 complex addition and multiplication.
- VFMADDC,
- VFMADDC_RND,
- VFCMADDC,
- VFCMADDC_RND,
-
- VFMULC,
- VFMULC_RND,
- VFCMULC,
- VFCMULC_RND,
-
- VFMADDCSH,
- VFMADDCSH_RND,
- VFCMADDCSH,
- VFCMADDCSH_RND,
-
- VFMULCSH,
- VFMULCSH_RND,
- VFCMULCSH,
- VFCMULCSH_RND,
-
- VPDPBSUD,
- VPDPBSUDS,
- VPDPBUUD,
- VPDPBUUDS,
- VPDPBSSD,
- VPDPBSSDS,
-
- VPDPWSUD,
- VPDPWSUDS,
- VPDPWUSD,
- VPDPWUSDS,
- VPDPWUUD,
- VPDPWUUDS,
-
- VMINMAX,
- VMINMAX_SAE,
- VMINMAXS,
- VMINMAXS_SAE,
-
- CVTP2IBS,
- CVTP2IUBS,
- CVTP2IBS_RND,
- CVTP2IUBS_RND,
- CVTTP2IBS,
- CVTTP2IUBS,
- CVTTP2IBS_SAE,
- CVTTP2IUBS_SAE,
-
- MPSADBW,
-
- VCVT2PH2BF8,
- VCVT2PH2BF8S,
- VCVT2PH2HF8,
- VCVT2PH2HF8S,
- VCVTBIASPH2BF8,
- VCVTBIASPH2BF8S,
- VCVTBIASPH2HF8,
- VCVTBIASPH2HF8S,
- VCVTPH2BF8,
- VCVTPH2BF8S,
- VCVTPH2HF8,
- VCVTPH2HF8S,
- VMCVTBIASPH2BF8,
- VMCVTBIASPH2BF8S,
- VMCVTBIASPH2HF8,
- VMCVTBIASPH2HF8S,
- VMCVTPH2BF8,
- VMCVTPH2BF8S,
- VMCVTPH2HF8,
- VMCVTPH2HF8S,
- VCVTHF82PH,
-
- // Compress and expand.
- COMPRESS,
- EXPAND,
-
- // Bits shuffle
- VPSHUFBITQMB,
-
- // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
- SINT_TO_FP_RND,
- UINT_TO_FP_RND,
- SCALAR_SINT_TO_FP,
- SCALAR_UINT_TO_FP,
- SCALAR_SINT_TO_FP_RND,
- SCALAR_UINT_TO_FP_RND,
-
- // Vector float/double to signed/unsigned integer.
- CVTP2SI,
- CVTP2UI,
- CVTP2SI_RND,
- CVTP2UI_RND,
- // Scalar float/double to signed/unsigned integer.
- CVTS2SI,
- CVTS2UI,
- CVTS2SI_RND,
- CVTS2UI_RND,
-
- // Vector float/double to signed/unsigned integer with truncation.
- CVTTP2SI,
- CVTTP2UI,
- CVTTP2SI_SAE,
- CVTTP2UI_SAE,
-
- // Saturation enabled Vector float/double to signed/unsigned
- // integer with truncation.
- CVTTP2SIS,
- CVTTP2UIS,
- CVTTP2SIS_SAE,
- CVTTP2UIS_SAE,
- // Masked versions of above. Used for v2f64 to v4i32.
- // SRC, PASSTHRU, MASK
- MCVTTP2SIS,
- MCVTTP2UIS,
-
- // Scalar float/double to signed/unsigned integer with truncation.
- CVTTS2SI,
- CVTTS2UI,
- CVTTS2SI_SAE,
- CVTTS2UI_SAE,
-
- // Vector signed/unsigned integer to float/double.
- CVTSI2P,
- CVTUI2P,
-
- // Scalar float/double to signed/unsigned integer with saturation.
- CVTTS2SIS,
- CVTTS2UIS,
- CVTTS2SIS_SAE,
- CVTTS2UIS_SAE,
-
- // Masked versions of above. Used for v2f64->v4f32.
- // SRC, PASSTHRU, MASK
- MCVTP2SI,
- MCVTP2UI,
- MCVTTP2SI,
- MCVTTP2UI,
- MCVTSI2P,
- MCVTUI2P,
-
- // Custom handling for FP_TO_xINT_SAT
- FP_TO_SINT_SAT,
- FP_TO_UINT_SAT,
-
- // Vector float to bfloat16.
- // Convert packed single data to packed BF16 data
- CVTNEPS2BF16,
- // Masked version of above.
- // SRC, PASSTHRU, MASK
- MCVTNEPS2BF16,
-
- // Dot product of BF16/FP16 pairs to accumulated into
- // packed single precision.
- DPBF16PS,
- DPFP16PS,
-
- // A stack checking function call. On Windows it's _chkstk call.
- DYN_ALLOCA,
-
- // For allocating variable amounts of stack space when using
- // segmented stacks. Check if the current stacklet has enough space, and
- // falls back to heap allocation if not.
- SEG_ALLOCA,
-
- // For allocating stack space when using stack clash protector.
- // Allocation is performed by block, and each block is probed.
- PROBED_ALLOCA,
-
- // Memory barriers.
- MFENCE,
-
- // Get a random integer and indicate whether it is valid in CF.
- RDRAND,
-
- // Get a NIST SP800-90B & C compliant random integer and
- // indicate whether it is valid in CF.
- RDSEED,
-
- // Protection keys
- // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
- // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
- // value for ECX.
- RDPKRU,
- WRPKRU,
-
- // SSE42 string comparisons.
- // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
- // will emit one or two instructions based on which results are used. If
- // flags and index/mask this allows us to use a single instruction since
- // we won't have to pick and opcode for flags. Instead we can rely on the
- // DAG to CSE everything and decide at isel.
- PCMPISTR,
- PCMPESTR,
-
- // Test if in transactional execution.
- XTEST,
-
- // Conversions between float and half-float.
- CVTPS2PH,
- CVTPS2PH_SAE,
- CVTPH2PS,
- CVTPH2PS_SAE,
-
- // Masked version of above.
- // SRC, RND, PASSTHRU, MASK
- MCVTPS2PH,
- MCVTPS2PH_SAE,
-
- // Galois Field Arithmetic Instructions
- GF2P8AFFINEINVQB,
- GF2P8AFFINEQB,
- GF2P8MULB,
-
- // LWP insert record.
- LWPINS,
-
- // User level wait
- UMWAIT,
- TPAUSE,
-
- // Enqueue Stores Instructions
- ENQCMD,
- ENQCMDS,
-
- // For avx512-vp2intersect
- VP2INTERSECT,
-
- // User level interrupts - testui
- TESTUI,
-
- // Perform an FP80 add after changing precision control in FPCW.
- FP80_ADD,
-
- // Conditional compare instructions
- CCMP,
- CTEST,
-
- /// X86 strict FP compare instructions.
- FIRST_STRICTFP_OPCODE,
- STRICT_FCMP = FIRST_STRICTFP_OPCODE,
- STRICT_FCMPS,
-
- // Vector packed double/float comparison.
- STRICT_CMPP,
-
- /// Vector comparison generating mask bits for fp and
- /// integer signed and unsigned data types.
- STRICT_CMPM,
-
- // Vector float/double to signed/unsigned integer with truncation.
- STRICT_CVTTP2SI,
- STRICT_CVTTP2UI,
-
- // Vector FP extend.
- STRICT_VFPEXT,
-
- // Vector FP round.
- STRICT_VFPROUND,
-
- // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
- // Also used by the legacy (V)ROUND intrinsics where we mask out the
- // scaling part of the immediate.
- STRICT_VRNDSCALE,
-
- // Vector signed/unsigned integer to float/double.
- STRICT_CVTSI2P,
- STRICT_CVTUI2P,
-
- // Strict FMA nodes.
- STRICT_FNMADD,
- STRICT_FMSUB,
- STRICT_FNMSUB,
-
- // Conversions between float and half-float.
- STRICT_CVTPS2PH,
- STRICT_CVTPH2PS,
-
- // Perform an FP80 add after changing precision control in FPCW.
- STRICT_FP80_ADD,
-
- /// Floating point max and min.
- STRICT_FMAX,
- STRICT_FMIN,
- LAST_STRICTFP_OPCODE = STRICT_FMIN,
-
- // Compare and swap.
- FIRST_MEMORY_OPCODE,
- LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
- LCMPXCHG8_DAG,
- LCMPXCHG16_DAG,
- LCMPXCHG16_SAVE_RBX_DAG,
-
- /// LOCK-prefixed arithmetic read-modify-write instructions.
- /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD,
- LSUB,
- LOR,
- LXOR,
- LAND,
- LBTS,
- LBTC,
- LBTR,
- LBTS_RM,
- LBTC_RM,
- LBTR_RM,
-
- /// RAO arithmetic instructions.
- /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
- AADD,
- AOR,
- AXOR,
- AAND,
-
- // Load, scalar_to_vector, and zero extend.
- VZEXT_LOAD,
-
- // extract_vector_elt, store.
- VEXTRACT_STORE,
-
- // scalar broadcast from memory.
- VBROADCAST_LOAD,
-
- // subvector broadcast from memory.
- SUBV_BROADCAST_LOAD,
-
- // Store FP control word into i16 memory.
- FNSTCW16m,
-
- // Load FP control word from i16 memory.
- FLDCW16m,
-
- // Store x87 FPU environment into memory.
- FNSTENVm,
-
- // Load x87 FPU environment from memory.
- FLDENVm,
-
- /// This instruction implements FP_TO_SINT with the
- /// integer destination in memory and a FP reg source. This corresponds
- /// to the X86::FIST*m instructions and the rounding mode change stuff. It
- /// has two inputs (token chain and address) and two outputs (int value
- /// and token chain). Memory VT specifies the type to store to.
- FP_TO_INT_IN_MEM,
-
- /// This instruction implements SINT_TO_FP with the
- /// integer source in memory and FP reg result. This corresponds to the
- /// X86::FILD*m instructions. It has two inputs (token chain and address)
- /// and two outputs (FP value and token chain). The integer source type is
- /// specified by the memory VT.
- FILD,
-
- /// This instruction implements a fp->int store from FP stack
- /// slots. This corresponds to the fist instruction. It takes a
- /// chain operand, value to store, address, and glue. The memory VT
- /// specifies the type to store as.
- FIST,
-
- /// This instruction implements an extending load to FP stack slots.
- /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
- /// operand, and ptr to load from. The memory VT specifies the type to
- /// load from.
- FLD,
-
- /// This instruction implements a truncating store from FP stack
- /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
- /// chain operand, value to store, address, and glue. The memory VT
- /// specifies the type to store as.
- FST,
-
- /// These instructions grab the address of the next argument
- /// from a va_list. (reads and modifies the va_list in memory)
- VAARG_64,
- VAARG_X32,
-
- // Vector truncating store with unsigned/signed saturation
- VTRUNCSTOREUS,
- VTRUNCSTORES,
- // Vector truncating masked store with unsigned/signed saturation
- VMTRUNCSTOREUS,
- VMTRUNCSTORES,
-
- // X86 specific gather and scatter
- MGATHER,
- MSCATTER,
-
- // Key locker nodes that produce flags.
- AESENC128KL,
- AESDEC128KL,
- AESENC256KL,
- AESDEC256KL,
- AESENCWIDE128KL,
- AESDECWIDE128KL,
- AESENCWIDE256KL,
- AESDECWIDE256KL,
-
- /// Compare and Add if Condition is Met. Compare value in operand 2 with
- /// value in memory of operand 1. If condition of operand 4 is met, add
- /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
- /// always updated with the original value from operand 1.
- CMPCCXADD,
-
- // Save xmm argument registers to the stack, according to %al. An operator
- // is needed so that this can be expanded with control flow.
- VASTART_SAVE_XMM_REGS,
-
- // Conditional load/store instructions
- CLOAD,
- CSTORE,
- LAST_MEMORY_OPCODE = CSTORE,
- };
- } // end namespace X86ISD
-
namespace X86 {
/// Current rounding mode is represented in bits 11:10 of FPSR. These
/// values are same as corresponding constants for rounding mode used
@@ -1186,9 +209,6 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
- /// This method returns the name of a target specific DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
/// Do not merge vector stores after legalization because that may conflict
/// with x86-specific store splitting optimizations.
bool mergeStoresAfterLegalization(EVT MemVT) const override {
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index a61bbe56d9c26..4e3119f542c4c 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -942,10 +942,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Glue.getNode())
RetOps.push_back(Glue);
- X86ISD::NodeType opcode = X86ISD::RET_GLUE;
+ unsigned RetOpcode = X86ISD::RET_GLUE;
if (CallConv == CallingConv::X86_INTR)
- opcode = X86ISD::IRET;
- return DAG.getNode(opcode, dl, MVT::Other, RetOps);
+ RetOpcode = X86ISD::IRET;
+ return DAG.getNode(RetOpcode, dl, MVT::Other, RetOps);
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index 116986a0fffea..fcf791b1a1f0f 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -141,8 +141,12 @@ def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
-def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
-def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
+ def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
+}
+
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>;
@@ -790,8 +794,11 @@ def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
[SDNPHasChain,SDNPCommutative]>;
+
def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs),
[(X86strict_fp80_add node:$lhs, node:$rhs),
(X86fp80_add node:$lhs, node:$rhs)]>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5321ecf0c1b2c..35c4d89a1b231 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -46,10 +46,12 @@ def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
-def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp,
- [SDNPHasChain]>;
-def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp,
- [SDNPHasChain]>;
+let IsStrictFP = true in {
+ def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp,
+ [SDNPHasChain]>;
+ def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp,
+ [SDNPHasChain]>;
+}
def X86any_fmin : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_fmin node:$src1, node:$src2),
@@ -146,6 +148,7 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>]>>;
+let IsStrictFP = true in
def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>]>,
@@ -165,6 +168,7 @@ def X86vfpround2 : SDNode<"X86ISD::VFPROUND2",
SDTCisSameAs<1, 2>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+let IsStrictFP = true in
def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>,
@@ -215,7 +219,10 @@ def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
+
+let IsStrictFP = true in
def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>;
+
def X86any_cmpp : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_cmpp node:$src1, node:$src2, node:$src3),
(X86cmpp node:$src1, node:$src2, node:$src3)]>;
@@ -235,7 +242,10 @@ def X86CmpMaskCCScalar :
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>;
+
+let IsStrictFP = true in
def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>;
+
def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_cmpm node:$src1, node:$src2, node:$src3),
(X86cmpm node:$src1, node:$src2, node:$src3)]>;
@@ -494,8 +504,11 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
+
+let IsStrictFP = true in
def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm,
[SDNPHasChain]>;
+
def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_VRndScale node:$src1, node:$src2),
(X86VRndScale node:$src1, node:$src2)]>;
@@ -554,17 +567,26 @@ def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+
def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fnmadd node:$src1, node:$src2, node:$src3),
(X86Fnmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+
def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fmsub node:$src1, node:$src2, node:$src3),
(X86Fmsub node:$src1, node:$src2, node:$src3)]>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+
+let IsStrictFP = true in
def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+
def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fnmsub node:$src1, node:$src2, node:$src3),
(X86Fnmsub node:$src1, node:$src2, node:$src3)]>;
@@ -709,8 +731,12 @@ def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
// cvtt fp-to-int staff
def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>;
def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>;
-def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>;
-def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>;
+ def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>;
+}
+
def X86any_cvttp2si : PatFrags<(ops node:$src),
[(X86strict_cvttp2si node:$src),
(X86cvttp2si node:$src)]>;
@@ -720,8 +746,12 @@ def X86any_cvttp2ui : PatFrags<(ops node:$src),
def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>;
def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>;
-def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>;
-def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>;
+ def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>;
+}
+
def X86any_VSintToFP : PatFrags<(ops node:$src),
[(X86strict_VSintToFP node:$src),
(X86VSintToFP node:$src)]>;
@@ -761,8 +791,11 @@ def X86mcvttp2uis : SDNode<"X86ISD::MCVTTP2UIS", SDTMFloatToInt>;
def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>]>;
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>;
+
+let IsStrictFP = true in
def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps,
[SDNPHasChain]>;
+
def X86any_cvtph2ps : PatFrags<(ops node:$src),
[(X86strict_cvtph2ps node:$src),
(X86cvtph2ps node:$src)]>;
@@ -773,8 +806,11 @@ def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>]>;
def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>;
+
+let IsStrictFP = true in
def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph,
[SDNPHasChain]>;
+
def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_cvtps2ph node:$src1, node:$src2),
(X86cvtps2ph node:$src1, node:$src2)]>;
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index aba62c36546f9..11e45616653ac 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "X86SelectionDAGInfo.h"
-#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
@@ -19,6 +18,9 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
+#define GET_SDNODE_DESC
+#include "X86GenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "x86-selectiondag-info"
@@ -27,14 +29,77 @@ static cl::opt<bool>
UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
cl::desc("Use fast short rep mov in memcpy lowering"));
+X86SelectionDAGInfo::X86SelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(X86GenSDNodeInfo) {}
+
+const char *X86SelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define NODE_NAME_CASE(NODE) \
+ case X86ISD::NODE: \
+ return "X86ISD::" #NODE;
+
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<X86ISD::NodeType>(Opcode)) {
+ NODE_NAME_CASE(POP_FROM_X87_REG)
+ NODE_NAME_CASE(GlobalBaseReg)
+ NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
+ NODE_NAME_CASE(PCMPESTR)
+ NODE_NAME_CASE(PCMPISTR)
+ NODE_NAME_CASE(MGATHER)
+ NODE_NAME_CASE(MSCATTER)
+ NODE_NAME_CASE(AESENCWIDE128KL)
+ NODE_NAME_CASE(AESDECWIDE128KL)
+ NODE_NAME_CASE(AESENCWIDE256KL)
+ NODE_NAME_CASE(AESDECWIDE256KL)
+ }
+#undef NODE_NAME_CASE
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
- Opcode <= X86ISD::LAST_MEMORY_OPCODE;
+ // These nodes don't have corresponding entries in *.td files yet.
+ if (Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
+ Opcode <= X86ISD::LAST_MEMORY_OPCODE)
+ return true;
+
+ return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
}
-bool X86SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
- return Opcode >= X86ISD::FIRST_STRICTFP_OPCODE &&
- Opcode <= X86ISD::LAST_STRICTFP_OPCODE;
+void X86SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case X86ISD::VP2INTERSECT:
+ // invalid number of results; expected 1, got 2
+ case X86ISD::VTRUNCSTOREUS:
+ case X86ISD::VTRUNCSTORES:
+ case X86ISD::FSETCCM_SAE:
+ // invalid number of operands; expected 3, got 4
+ case X86ISD::CVTPH2PS:
+ case X86ISD::CVTTP2SI_SAE:
+ case X86ISD::CVTTP2UI_SAE:
+ case X86ISD::CVTTP2IBS_SAE:
+ // invalid number of operands; expected 1, got 2
+ case X86ISD::CMPMM_SAE:
+ // invalid number of operands; expected 4, got 5
+ case X86ISD::CMPMM:
+ case X86ISD::FSETCCM:
+ // operand #2 must have type i8, but has type i32
+ case X86ISD::CALL:
+ case X86ISD::NT_BRIND:
+ // operand #1 must have type i32 (iPTR), but has type i64
+ case X86ISD::ADD:
+ // result #1 must have type i32, but has type i128
+ case X86ISD::INSERTQI:
+ case X86ISD::EXTRQI:
+ // result #0 must have type v2i64, but has type v16i8/v8i16
+ case X86ISD::CMPCCXADD:
+ // operand #4 must have type i8, but has type i32
+ return;
+ }
+
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
/// Returns the best type to use with repmovs/repstos depending on alignment.
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h
index e77e16bab830d..19c5986982614 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -15,20 +15,68 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "X86GenSDNodeInfo.inc"
+
namespace llvm {
+namespace X86ISD {
+
+enum NodeType : unsigned {
+ /// The same as ISD::CopyFromReg except that this node makes it explicit
+ /// that it may lower to an x87 FPU stack pop. Optimizations should be more
+ /// cautious when handling this node than a normal CopyFromReg to avoid
+ /// removing a required FPU stack pop. A key requirement is optimizations
+ /// should not optimize any users of a chain that contains a
+ /// POP_FROM_X87_REG to use a chain from a point earlier than the
+ /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
+ POP_FROM_X87_REG = X86ISD::GENERATED_OPCODE_END,
+
+ /// On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ // SSE42 string comparisons.
+ // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
+ // will emit one or two instructions based on which results are used. If
+ // flags and index/mask this allows us to use a single instruction since
+ // we won't have to pick and opcode for flags. Instead we can rely on the
+ // DAG to CSE everything and decide at isel.
+ PCMPISTR,
+ PCMPESTR,
+
+ // Compare and swap.
+ FIRST_MEMORY_OPCODE,
+ LCMPXCHG16_SAVE_RBX_DAG = FIRST_MEMORY_OPCODE,
-class X86SelectionDAGInfo : public SelectionDAGTargetInfo {
+ // X86 specific gather and scatter
+ MGATHER,
+ MSCATTER,
+
+ // Key locker nodes that produce flags.
+ AESENCWIDE128KL,
+ AESDECWIDE128KL,
+ AESENCWIDE256KL,
+ AESDECWIDE256KL,
+ LAST_MEMORY_OPCODE = AESDECWIDE256KL,
+};
+
+} // namespace X86ISD
+
+class X86SelectionDAGInfo : public SelectionDAGGenTargetInfo {
/// Returns true if it is possible for the base register to conflict with the
/// given set of clobbers for a memory intrinsic.
bool isBaseRegConflictPossible(SelectionDAG &DAG,
ArrayRef<MCPhysReg> ClobberSet) const;
public:
- explicit X86SelectionDAGInfo() = default;
+ X86SelectionDAGInfo();
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
bool isTargetMemoryOpcode(unsigned Opcode) const override;
- bool isTargetStrictFPOpcode(unsigned Opcode) const override;
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
@@ -44,6 +92,6 @@ class X86SelectionDAGInfo : public SelectionDAGTargetInfo {
MachinePointerInfo SrcPtrInfo) const override;
};
-}
+} // namespace llvm
#endif
>From 8f06fa392ab8a0b3c637d3c7589254863d8282be Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Thu, 12 Dec 2024 01:59:14 +0300
Subject: [PATCH 2/3] Generic bugs
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +--
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 +-
3 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c2b4c19846316..cd0137532aaa5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1955,7 +1955,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain,
const SDLoc &DL) {
EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout());
- return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain,
+ return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Other, Chain,
getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4f13f3b128ea4..42f308f77f76a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3186,8 +3186,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
Guard, GuardVal, ISD::SETNE);
// If the guard/stackslot do not equal, branch to failure MBB.
- SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, GuardVal.getOperand(0),
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
// Otherwise branch to success MBB.
SDValue Br = DAG.getNode(ISD::BR, dl,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 5bed32db528d6..7fc14d592d21e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2717,7 +2717,7 @@ getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex) {
void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) {
SDLoc dl(N);
- CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue,
+ CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Other,
CurDAG->getTargetConstant(N->getConstantOperandVal(1),
dl, MVT::i64, true));
}
>From 08a591b3e03748ffb1b0201402dd72e858ea7298 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Mon, 17 Nov 2025 21:26:05 +0300
Subject: [PATCH 3/3] Move comments
---
llvm/lib/Target/X86/X86InstrCompiler.td | 3 +-
llvm/lib/Target/X86/X86InstrFragments.td | 155 ++++++++-
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 335 +++++++++++++++----
llvm/lib/Target/X86/X86InstrMMX.td | 3 +
llvm/lib/Target/X86/X86InstrRAOINT.td | 2 +
llvm/lib/Target/X86/X86InstrSSE.td | 2 +
llvm/lib/Target/X86/X86InstrTSX.td | 1 +
7 files changed, 436 insertions(+), 65 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index ec31675731b79..3a707c8d0ae99 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -870,7 +870,8 @@ let Predicates = [UseIncDec, In64BitMode] in {
def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
}
-// Atomic bit test.
+/// LOCK-prefixed arithmetic read-modify-write instructions.
+/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
SDTCisVT<2, i8>, SDTCisVT<3, i32>]>;
def x86bts : SDNode<"X86ISD::LBTS", X86LBTest,
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index fcf791b1a1f0f..254509c191979 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -131,47 +131,82 @@ def SDTX86Cmpccxadd : SDTypeProfile<1, 4, [SDTCisSameAs<0, 2>,
SDTCisPtrTy<1>, SDTCisSameAs<2, 3>,
SDTCisVT<4, i8>]>;
+// Memory barriers.
def X86MFence : SDNode<"X86ISD::MFENCE", SDTNone, [SDNPHasChain]>;
-
+// Bit scan forward.
def X86bsf : SDNode<"X86ISD::BSF", SDTBinaryArithWithFlags>;
+
+// Bit scan reverse.
def X86bsr : SDNode<"X86ISD::BSR", SDTBinaryArithWithFlags>;
+
+// X86 funnel/double shift i16 instructions. These correspond to
+// X86::SHLDW and X86::SHRDW instructions which have different amt
+// modulo rules to generic funnel shifts.
+// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>;
def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
+// X86 compare and logical compare instructions.
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
let IsStrictFP = true in {
+ /// X86 strict FP compare instructions.
def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
}
+// X86 bit-test instructions.
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+// Conditional compare instructions
def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>;
def X86ctest : SDNode<"X86ISD::CTEST", SDTX86Ccmp>;
+// Conditional load/store instructions
def X86cload : SDNode<"X86ISD::CLOAD", SDTX86Cload, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86cstore : SDNode<"X86ISD::CSTORE", SDTX86Cstore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+// X86 conditional moves. Operand 0 and operand 1 are the two values
+// to select from. Operand 2 is the condition code, and operand 3 is the
+// flag operand produced by a CMP or TEST instruction.
def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
+
+// X86 conditional branches. Operand 0 is the chain operand, operand 1
+// is the block to branch if condition is true, operand 2 is the
+// condition code, and operand 3 is the flag operand produced by a CMP
+// or TEST instruction.
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
[SDNPHasChain]>;
+
+// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+// operand, usually produced by a CMP instruction.
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+
+// Same as SETCC except it's materialized with a sbb and the value is all
+// one's or all zero's.
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+// Get a random integer and indicate whether it is valid in CF.
def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;
+// Get a NIST SP800-90B & C compliant random integer and
+// indicate whether it is valid in CF.
def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;
+// Protection keys
+// RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
+// WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
+// value for ECX.
def X86rdpkru : SDNode<"X86ISD::RDPKRU", SDTX86rdpkru,
[SDNPHasChain, SDNPSideEffect]>;
def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru,
[SDNPHasChain, SDNPSideEffect]>;
+// Compare and swap.
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -182,15 +217,24 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
+// Return with a glue operand. Operand 0 is the chain operand, operand
+// 1 is the number of bytes of stack to pop.
def X86retglue : SDNode<"X86ISD::RET_GLUE", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// Return from interrupt. Operand 0 is the number of bytes to pop.
def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue]>;
+// Save xmm argument registers to the stack, according to %al. An operator
+// is needed so that this can be expanded with control flow.
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
SDT_X86VASTART_SAVE_XMM_REGS,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>;
+
+// These instructions grab the address of the next argument
+// from a va_list. (reads and modifies the va_list in memory)
def X86vaarg64 :
SDNode<"X86ISD::VAARG_64", SDT_X86VAARG,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
@@ -199,6 +243,7 @@ def X86vaargx32 :
SDNode<"X86ISD::VAARG_X32", SDT_X86VAARG,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
+
def X86callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -206,63 +251,111 @@ def X86callseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+// These operations represent an abstract X86 call
+// instruction, which includes a bunch of information. In particular the
+// operands of these node are:
+//
+// #0 - The incoming token chain
+// #1 - The callee
+// #2 - The number of arg bytes the caller pushes on the stack.
+// #3 - The number of arg bytes the callee pops off the stack.
+// #4 - The value to pass in AL/AX/EAX (optional)
+// #5 - The value to pass in DL/DX/EDX (optional)
+//
+// The result values of these nodes are:
+//
+// #0 - The outgoing token chain
+// #1 - The first register result value (optional)
+// #2 - The second register result value (optional)
+//
def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+// Pseudo for a OBJC call that gets emitted together with a special
+// marker instruction.
def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+// Pseudo for a call to an imported function to ensure the correct machine
+// instruction is emitted for Import Call Optimization.
def X86imp_call : SDNode<"X86ISD::IMP_CALL", SDT_X86Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+// Same as call except it adds the NoTrack prefix.
def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+
+// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
+// operand 1 is the target address.
def X86NoTrackBrind : SDNode<"X86ISD::NT_BRIND", SDT_X86NtBrind,
[SDNPHasChain]>;
+// Repeat fill, corresponds to X86::REP_STOSx.
def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
+
+// Repeat move, corresponds to X86::REP_MOVSx.
def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad]>;
+// A wrapper node for TargetConstantPool, TargetJumpTable,
+// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
+// MCSymbol and TargetBlockAddress.
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+
+// Special wrapper used under X86-64 PIC mode for RIP
+// relative displacements.
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
+// Thread Local Storage.
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+// Thread Local Storage. A call to get the start address
+// of the TLS block for the current module.
def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+// Thread Local Storage. A descriptor containing pointer to
+// code and to argument to get the TLS offset for the symbol.
def X86tlsdesc : SDNode<"X86ISD::TLSDESC", SDT_X86TLSADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+// Exception Handling helpers.
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
+// SjLj exception handling setjmp.
def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP",
SDTypeProfile<1, 1, [SDTCisInt<0>,
SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPSideEffect]>;
+
+// SjLj exception handling longjmp.
def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPSideEffect]>;
+
+// SjLj exception handling dispatch.
def X86eh_sjlj_setup_dispatch : SDNode<"X86ISD::EH_SJLJ_SETUP_DISPATCH",
SDTypeProfile<0, 0, []>,
[SDNPHasChain, SDNPSideEffect]>;
+// Tail call return. See X86TargetLowering::LowerCall for
+// the list of operands.
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+// Arithmetic operations with FLAGS results.
def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
@@ -280,6 +373,8 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
+/// LOCK-prefixed arithmetic read-modify-write instructions.
+/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
def X86lock_add : SDNode<"X86ISD::LADD", SDTLockBinaryArithWithFlags,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
@@ -296,33 +391,47 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
+// Bit field extract.
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>;
+// Zero High Bits Starting with Specified Bit Position.
def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
+// Parallel extract and deposit.
def X86pdep : SDNode<"X86ISD::PDEP", SDTIntBinOp>;
def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>;
+// X86-specific multiply by immediate.
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+// A stack checking function call. On Windows it's _chkstk call.
def X86DynAlloca : SDNode<"X86ISD::DYN_ALLOCA", SDT_X86DYN_ALLOCA,
[SDNPHasChain, SDNPOutGlue]>;
+// For allocating variable amounts of stack space when using
+// segmented stacks. Check if the current stacklet has enough space, and
+// falls back to heap allocation if not.
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
[SDNPHasChain]>;
+// For allocating stack space when using stack clash protector.
+// Allocation is performed by block, and each block is probed.
def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
[SDNPHasChain]>;
+// Thread Local Storage. When calling to an OS provided
+// thunk at the address from an earlier relocation.
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+// LWP insert record.
def X86lwpins : SDNode<"X86ISD::LWPINS",
SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>;
+// User level wait
def X86umwait : SDNode<"X86ISD::UMWAIT",
SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
@@ -333,14 +442,18 @@ def X86tpause : SDNode<"X86ISD::TPAUSE",
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;
+// Enqueue Stores Instructions
def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
[SDNPHasChain, SDNPSideEffect]>;
def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
[SDNPHasChain, SDNPSideEffect]>;
+
+// User level interrupts - testui
def X86testui : SDNode<"X86ISD::TESTUI",
SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;
+// Key locker nodes that produce flags.
def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL,
[SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
SDNPMemOperand]>;
@@ -354,6 +467,10 @@ def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL,
[SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
SDNPMemOperand]>;
+// Compare and Add if Condition is Met. Compare value in operand 2 with
+// value in memory of operand 1. If condition of operand 4 is met, add
+// value operand 3 to m32 and write new value in operand 1. Operand 2 is
+// always updated with the original value from operand 1.
def X86cmpccxadd : SDNode<"X86ISD::CMPCCXADD", SDTX86Cmpccxadd,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
@@ -793,8 +910,10 @@ def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+// Perform an FP80 add after changing precision control in FPCW.
def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
+// Perform an FP80 add after changing precision control in FPCW.
let IsStrictFP = true in
def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
[SDNPHasChain,SDNPCommutative]>;
@@ -803,25 +922,59 @@ def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs),
[(X86strict_fp80_add node:$lhs, node:$rhs),
(X86fp80_add node:$lhs, node:$rhs)]>;
+// This instruction implements an extending load to FP stack slots.
+// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+// operand, and ptr to load from. The memory VT specifies the type to
+// load from.
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+// This instruction implements a truncating store from FP stack
+// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+// chain operand, value to store, address, and glue. The memory VT
+// specifies the type to store as.
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+// This instruction implements SINT_TO_FP with the
+// integer source in memory and FP reg result. This corresponds to the
+// X86::FILD*m instructions. It has two inputs (token chain and address)
+// and two outputs (FP value and token chain). The integer source type is
+// specified by the memory VT.
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+// This instruction implements a fp->int store from FP stack
+// slots. This corresponds to the fist instruction. It takes a
+// chain operand, value to store, address, and glue. The memory VT
+// specifies the type to store as.
def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+// This instruction implements FP_TO_SINT with the
+// integer destination in memory and a FP reg source. This corresponds
+// to the X86::FIST*m instructions and the rounding mode change stuff. It
+// has two inputs (token chain and address) and two outputs (int value
+// and token chain). Memory VT specifies the type to store to.
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+// Store FP control word into i16 memory.
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
[SDNPHasChain, SDNPMayStore, SDNPSideEffect,
SDNPMemOperand]>;
+
+// Load FP control word from i16 memory.
def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad,
[SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
SDNPMemOperand]>;
+
+// Store x87 FPU environment into memory.
def X86fpenv_get : SDNode<"X86ISD::FNSTENVm", SDTX86FPEnv,
[SDNPHasChain, SDNPMayStore, SDNPSideEffect,
SDNPMemOperand]>;
+
+// Load x87 FPU environment from memory.
def X86fpenv_set : SDNode<"X86ISD::FLDENVm", SDTX86FPEnv,
[SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
SDNPMemOperand]>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 35c4d89a1b231..b4b0a37cdd228 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -14,10 +14,13 @@
// MMX specific DAG Nodes.
//===----------------------------------------------------------------------===//
-// Low word of MMX to GPR.
+// Copies a 32-bit value from the low word of a MMX
+// vector to a GPR.
def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
[SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
-// GPR to low word of MMX.
+
+// Copies a GPR into the low 32-bit word of a MMX vector
+// and zero out the high word.
def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
@@ -35,8 +38,11 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
+/// Floating point max and min.
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+
+// Scalar intrinsic floating point max and min.
def X86fmins : SDNode<"X86ISD::FMINS", SDTFPBinOp>;
def X86fmaxs : SDNode<"X86ISD::FMAXS", SDTFPBinOp>;
@@ -47,6 +53,7 @@ def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
let IsStrictFP = true in {
+ /// Floating point max and min.
def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp,
[SDNPHasChain]>;
def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp,
@@ -60,70 +67,128 @@ def X86any_fmax : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_fmax node:$src1, node:$src2),
(X86fmax node:$src1, node:$src2)]>;
+// Bitwise logical AND of floating point values. This corresponds
+// to X86::ANDPS or X86::ANDPD.
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
+
+// Bitwise logical OR of floating point values. This corresponds
+// to X86::ORPS or X86::ORPD.
def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
+
+// Bitwise logical XOR of floating point values. This corresponds
+// to X86::XORPS or X86::XORPD.
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
+
+// Bitwise logical ANDNOT of floating point values. This
+// corresponds to X86::ANDNPS or X86::ANDNPD.
def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp>;
+
+// Floating point reciprocal-sqrt and reciprocal approximation.
+// Note that these typically require refinement
+// in order to obtain suitable precision.
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+
+// Floating point horizontal add/sub.
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
+
+// Integer horizontal add/sub.
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
+
+// X86 compare and logical compare instructions.
def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
+
+// X86 compare with Intrinsics similar to COMI.
def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>;
def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>;
+
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+
+// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+// Operands are two FP values to compare; result is a mask of
+// 0s or 1s. Generally DTRT for C/C++ with NaNs.
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
+// Shuffle 16 8-bit values within a vector.
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
+
+// Compute Sum of Absolute Differences.
def X86psadbw : SDNode<"X86ISD::PSADBW",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
SDTCVecEltisVT<1, i8>,
SDTCisSameSizeAs<0,1>,
SDTCisSameAs<1,2>]>, [SDNPCommutative]>;
+
def SDTX86PSADBW : SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, i8>,
SDTCisSameSizeAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
+
+// Compute Double Block Packed Sum-Absolute-Differences
def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", SDTX86PSADBW>;
+
+// Bitwise Logical AND NOT of Packed FP values.
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
+
def X86multishift : SDNode<"X86ISD::MULTISHIFT",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<1,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>,
SDTCisVT<2, i8>]>>;
+
+// Extract a 16-bit value from a vector and zero extend it to
+// i32, corresponds to X86::PEXTRW.
def X86pextrw : SDNode<"X86ISD::PEXTRW",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>,
SDTCisVT<2, i8>]>>;
+
+// Insert the lower 8-bits of a 32-bit value to a vector,
+// corresponds to X86::PINSRB.
def X86pinsrb : SDNode<"X86ISD::PINSRB",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisVT<2, i32>, SDTCisVT<3, i8>]>>;
+
+// Insert the lower 16-bits of a 32-bit value to a vector,
+// corresponds to X86::PINSRW.
def X86pinsrw : SDNode<"X86ISD::PINSRW",
SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
SDTCisVT<2, i32>, SDTCisVT<3, i8>]>>;
+
+// Insert any element of a 4 x float vector into any element
+// of a destination 4 x floatvector.
def X86insertps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
+
+// Vector move to low scalar and zero higher vector elements.
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+// Load, scalar_to_vector, and zero extend.
def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+// extract_vector_elt, store.
def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+// scalar broadcast from memory.
def X86VBroadcastld : SDNode<"X86ISD::VBROADCAST_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+// subvector broadcast from memory.
def X86SubVBroadcastld : SDNode<"X86ISD::SUBV_BROADCAST_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -137,17 +202,31 @@ def SDTVmtrunc : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>;
+// Vector integer truncate.
def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
+
+// Vector integer truncate with unsigned/signed saturation.
def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
+
+// Masked version of the above. Used when less than a 128-bit result is
+// produced since the mask only applies to the lower elements and can't
+// be represented by a select.
+// SRC, PASSTHRU, MASK
def X86vmtrunc : SDNode<"X86ISD::VMTRUNC", SDTVmtrunc>;
def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
+// Vector FP extend.
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>]>>;
+def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE",
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
+ SDTCisOpSmallerThanOp<1, 0>]>>;
+// Vector FP extend.
let IsStrictFP = true in
def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
@@ -158,16 +237,20 @@ def X86any_vfpext : PatFrags<(ops node:$src),
[(X86strict_vfpext node:$src),
(X86vfpext node:$src)]>;
+// Vector FP round.
def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+
+// Convert TWO packed single data to one packed data
def X86vfpround2 : SDNode<"X86ISD::VFPROUND2",
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>,
SDTCisSameAs<1, 2>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+// Vector FP round.
let IsStrictFP = true in
def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
@@ -192,6 +275,7 @@ def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
+// Vector FP extend.
def X86fpexts : SDNode<"X86ISD::VFPEXTS",
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -203,6 +287,8 @@ def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE",
SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
+// Masked version of VFPROUND. Used for v2f64->v4f32.
+// SRC, PASSTHRU, MASK
def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>,
@@ -213,13 +299,18 @@ def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVT<2, i8>, SDTCisInt<0>]>;
+// 128-bit vector logical left / right shift
def X86vshldq : SDNode<"X86ISD::VSHLDQ", X86vshiftimm>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", X86vshiftimm>;
+
+// Vector integer comparisons.
def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
+// Vector packed double/float comparison.
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
+// Vector packed double/float comparison.
let IsStrictFP = true in
def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>;
@@ -240,19 +331,31 @@ def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
+// Vector comparison generating mask bits for fp and
+// integer signed and unsigned data types.
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+
+// Vector mask comparison generating mask bits for FP values.
def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>;
+// Vector comparison generating mask bits for fp and
+// integer signed and unsigned data types.
let IsStrictFP = true in
def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>;
def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_cmpm node:$src1, node:$src2, node:$src3),
(X86cmpm node:$src1, node:$src2, node:$src3)]>;
+
+// Vector mask comparison with SAE for FP values.
def X86cmpmmSAE : SDNode<"X86ISD::CMPMM_SAE", X86MaskCmpMaskCC>;
+
+// X86 FP SETCC, similar to FSETCC, but with output as an i1 mask and
+// and a version with SAE.
def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>;
+// v8i16 Horizontal minimum and position.
def X86phminpos: SDNode<"X86ISD::PHMINPOS",
SDTypeProfile<1, 1, [SDTCisVT<0, v8i16>, SDTCisVT<1, v8i16>]>>;
@@ -260,6 +363,7 @@ def X86vshiftuniform : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>, SDTCisInt<0>,
SDTCisInt<2>]>;
+// Vector shift elements
def X86vshl : SDNode<"X86ISD::VSHL", X86vshiftuniform>;
def X86vsrl : SDNode<"X86ISD::VSRL", X86vshiftuniform>;
def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>;
@@ -267,14 +371,17 @@ def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>;
def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<0>]>;
+// Vector variable shift
def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>;
def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>;
def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>;
+// Vector shift elements by immediate
def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>;
def X86vsrli : SDNode<"X86ISD::VSRLI", X86vshiftimm>;
def X86vsrai : SDNode<"X86ISD::VSRAI", X86vshiftimm>;
+// Shifts of mask registers.
def X86kshiftl : SDNode<"X86ISD::KSHIFTL",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
SDTCisSameAs<0, 1>,
@@ -284,14 +391,18 @@ def X86kshiftr : SDNode<"X86ISD::KSHIFTR",
SDTCisSameAs<0, 1>,
SDTCisVT<2, i8>]>>;
+// ADD for masks.
def X86kadd : SDNode<"X86ISD::KADD", SDTIntBinOp, [SDNPCommutative]>;
+// Bit rotate by immediate
def X86vrotli : SDNode<"X86ISD::VROTLI", X86vshiftimm>;
def X86vrotri : SDNode<"X86ISD::VROTRI", X86vshiftimm>;
+// XOP arithmetic/logical shifts.
def X86vpshl : SDNode<"X86ISD::VPSHL", X86vshiftvariable>;
def X86vpsha : SDNode<"X86ISD::VPSHA", X86vshiftvariable>;
+// XOP signed/unsigned integer comparisons.
def X86vpcom : SDNode<"X86ISD::VPCOM",
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
@@ -300,6 +411,8 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU",
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisVT<3, i8>, SDTCisInt<0>]>>;
+
+// XOP two source permutation.
def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
@@ -307,6 +420,8 @@ def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
SDTCisSameNumEltsAs<0, 3>,
SDTCisSameSizeAs<0,3>,
SDTCisVT<4, i8>]>>;
+
+// XOP packed permute bytes.
def X86vpperm : SDNode<"X86ISD::VPPERM",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0, 3>]>>;
@@ -315,31 +430,44 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
+// Vector Multiply Packed UnsignedIntegers with Round and Scale.
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
+
+// Vector bitwise comparisons.
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+
+// Vector packed fp sign bitwise comparisons.
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// OR/AND test for masks.
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
+// Vector sign bit extraction.
def X86movmsk : SDNode<"X86ISD::MOVMSK",
SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;
+// X86 Select
def X86selects : SDNode<"X86ISD::SELECTS",
SDTypeProfile<1, 3, [SDTCisVT<1, v1i1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<2, 3>]>>;
+// Vector multiply packed unsigned doubleword integers.
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>]>,
[SDNPCommutative]>;
+
+// Vector multiply packed signed doubleword integers.
def X86pmuldq : SDNode<"X86ISD::PMULDQ",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>]>,
[SDNPCommutative]>;
+/// SSE4A Extraction and Insertion.
def X86extrqi : SDNode<"X86ISD::EXTRQI",
SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>;
@@ -404,19 +532,25 @@ def SDTFPToxIntSatOp
: SDTypeProfile<1,
1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>]>;
+// Custom handling for FP_TO_xINT_SAT
def X86fp2sisat : SDNode<"X86ISD::FP_TO_SINT_SAT", SDTFPToxIntSatOp>;
def X86fp2uisat : SDNode<"X86ISD::FP_TO_UINT_SAT", SDTFPToxIntSatOp>;
+// Intra-lane alignr.
def X86PAlignr : SDNode<"X86ISD::PALIGNR",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i8>,
SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisVT<3, i8>]>>;
+
+// AVX512 inter-lane alignr.
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
+// VBMI2 Concat & Shift.
def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
+// Detect Conflicts Within a Vector
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
@@ -424,6 +558,8 @@ def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+
+// Shuffle Packed Values at 128-bit granularity.
def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>;
def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
@@ -458,12 +594,16 @@ def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisSameSizeAs<0,1>,
SDTCisSameAs<1,2>,
SDTCisOpSmallerThanOp<0, 1>]>;
+
+// Several flavors of instructions with vector shuffle behaviors.
+// Saturated signed/unnsigned packing.
def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
+// Multiply and Add Packed Integers.
def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, i8>,
@@ -478,12 +618,19 @@ def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD",
def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
+
+// Variable Permute (VPERM).
+// Res = VPERMV MaskV, V0
def X86VPermv : SDNode<"X86ISD::VPERMV",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>,
SDTCisSameNumEltsAs<0,1>,
SDTCisSameSizeAs<0,1>,
SDTCisSameAs<0,2>]>>;
+
def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
+
+// 3-op Variable Permute (VPERMT2).
+// Res = VPERMV3 V0, MaskV, V1
def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>,
@@ -491,20 +638,40 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
SDTCisSameSizeAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
+// Bitwise ternary logic.
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
+// Fix Up Special Packed Float32/64 values.
def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImm>;
def X86VFixupimmSAE : SDNode<"X86ISD::VFIXUPIMM_SAE", SDTFPTernaryOpImm>;
def X86VFixupimms : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImm>;
def X86VFixupimmSAEs : SDNode<"X86ISD::VFIXUPIMMS_SAE", SDTFPTernaryOpImm>;
+
+// Range Restriction Calculation For Packed Pairs of Float32/64 values.
def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImm>;
def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
+def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>;
+def X86RangesSAE : SDNode<"X86ISD::VRANGES_SAE", SDTFPBinOpImm>;
+
+// Reduce - Perform Reduction Transformation on scalar\packed FP.
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
+def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>;
+def X86ReducesSAE : SDNode<"X86ISD::VREDUCES_SAE", SDTFPBinOpImm>;
+
+// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
+// Also used by the legacy (V)ROUND intrinsics where we mask out the
+// scaling part of the immediate.
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
+def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
+def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>;
+def X86RndScalesSAE: SDNode<"X86ISD::VRNDSCALES_SAE",SDTFPBinOpImm>;
+// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
+// Also used by the legacy (V)ROUND intrinsics where we mask out the
+// scaling part of the immediate.
let IsStrictFP = true in
def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm,
[SDNPHasChain]>;
@@ -513,22 +680,39 @@ def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_VRndScale node:$src1, node:$src2),
(X86VRndScale node:$src1, node:$src2)]>;
-def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
+// Extract Normalized Mantissas.
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>;
def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>;
+def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImm>;
+def X86GetMantsSAE : SDNode<"X86ISD::VGETMANTS_SAE", SDTFPBinOpImm>;
+
+// Tests Types Of a FP Values for packed types.
def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
SDTCisFP<1>,
SDTCisSameNumEltsAs<0,1>,
SDTCisVT<2, i32>]>, []>;
+
+// Tests Types Of a FP Values for scalar types.
def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>,
SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
+// Broadcast (splat) scalar or element 0 of a vector. If the operand is
+// a vector, this node may change the vector length as part of the splat.
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+
+// Broadcast mask to vector.
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
+// Blend where the selector is an immediate.
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
+
+// Dynamic (non-constant condition) vector blend where only the sign bits
+// of the condition elements are used. This is used to enforce that the
+// condition mask is not valid for generic VSELECT optimizations. This
+// is also used to implement the intrinsics.
+// Operands are in VSELECT order: MASK, TRUE, FALSE
def X86Blendv : SDNode<"X86ISD::BLENDV",
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
SDTCisSameAs<0, 2>,
@@ -536,8 +720,10 @@ def X86Blendv : SDNode<"X86ISD::BLENDV",
SDTCisSameNumEltsAs<0, 1>,
SDTCisSameSizeAs<0, 1>]>>;
+// Combined add and sub on an FP vector.
def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
+// FP vector ops with rounding mode.
def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
def X86fadds : SDNode<"X86ISD::FADDS", SDTFPBinOp>;
def X86faddRnds : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>;
@@ -554,45 +740,52 @@ def X86fmaxSAE : SDNode<"X86ISD::FMAX_SAE", SDTFPBinOp>;
def X86fmaxSAEs : SDNode<"X86ISD::FMAXS_SAE", SDTFPBinOp>;
def X86fminSAE : SDNode<"X86ISD::FMIN_SAE", SDTFPBinOp>;
def X86fminSAEs : SDNode<"X86ISD::FMINS_SAE", SDTFPBinOp>;
+
+// FP Scale.
def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOp>;
def X86scalefRnd : SDNode<"X86ISD::SCALEF_RND", SDTFPBinOpRound>;
def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOp>;
def X86scalefsRnd: SDNode<"X86ISD::SCALEFS_RND", SDTFPBinOpRound>;
+
+// FP vector ops with rounding mode.
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
def X86fsqrts : SDNode<"X86ISD::FSQRTS", SDTFPBinOp>;
def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>;
+
+// FP vector get exponent.
def X86fgetexp : SDNode<"X86ISD::FGETEXP", SDTFPUnaryOp>;
def X86fgetexpSAE : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>;
def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
+// FMA nodes.
+// We use the target independent ISD::FMA for the non-inverted case.
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>;
-let IsStrictFP = true in
-def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+let IsStrictFP = true in {
+ // Strict FMA nodes.
+ def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+ def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+ def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+}
def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fnmadd node:$src1, node:$src2, node:$src3),
(X86Fnmadd node:$src1, node:$src2, node:$src3)]>;
-def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
-
-let IsStrictFP = true in
-def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fmsub node:$src1, node:$src2, node:$src3),
(X86Fmsub node:$src1, node:$src2, node:$src3)]>;
-def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
-
-let IsStrictFP = true in
-def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fnmsub node:$src1, node:$src2, node:$src3),
(X86Fnmsub node:$src1, node:$src2, node:$src3)]>;
-def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>;
+// FMA with rounding mode.
def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound, [SDNPCommutative]>;
@@ -600,15 +793,21 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat
def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
+// For avx512-vp2intersect
def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT",
SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
SDTCisVec<1>, SDTCisSameAs<1, 2>]>>;
def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+
+// AVX512IFMA multiply and add.
+// NOTE: These are different than the instruction and perform
+// op0 x op1 + op2.
def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>;
+// AVX512-FP16 complex addition and multiplication.
def x86vfmaddc : SDNode<"X86ISD::VFMADDC", SDTFPTernaryOp, [SDNPCommutative]>;
def x86vfmaddcRnd : SDNode<"X86ISD::VFMADDC_RND", SDTFmaRound, [SDNPCommutative]>;
def x86vfcmaddc : SDNode<"X86ISD::VFCMADDC", SDTFPTernaryOp>;
@@ -627,28 +826,23 @@ def x86vfcmaddcShRnd : SDNode<"X86ISD::VFCMADDCSH_RND",SDTFmaRound>;
def x86vfmulcShRnd : SDNode<"X86ISD::VFMULCSH_RND", SDTFPBinOpRound, [SDNPCommutative]>;
def x86vfcmulcShRnd : SDNode<"X86ISD::VFCMULCSH_RND", SDTFPBinOpRound>;
+// AVX-512 reciprocal approximations with a little more precision.
def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>;
+def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>;
+def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
// VNNI
def SDTVnni : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+
+// VNNI
def X86Vpdpbusd : SDNode<"X86ISD::VPDPBUSD", SDTVnni>;
def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;
-def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
-def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
-def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>;
-def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>;
-def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>;
-def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImm>;
-def X86RangesSAE : SDNode<"X86ISD::VRANGES_SAE", SDTFPBinOpImm>;
-def X86RndScalesSAE : SDNode<"X86ISD::VRNDSCALES_SAE", SDTFPBinOpImm>;
-def X86ReducesSAE : SDNode<"X86ISD::VREDUCES_SAE", SDTFPBinOpImm>;
-def X86GetMantsSAE : SDNode<"X86ISD::VGETMANTS_SAE", SDTFPBinOpImm>;
-
+// Compress and expand.
def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
[SDTCisSameAs<0, 1>, SDTCisVec<1>,
SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
@@ -658,6 +852,7 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<0, 3>]>, []>;
+// Bits shuffle
// vpshufbitqmb
def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
@@ -687,22 +882,25 @@ def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisInt<1>,
SDTCisVT<2, i32>]>;
-// Scalar
+// Convert Unsigned/Integer to Floating-Point Value with rounding mode.
def X86SintToFp : SDNode<"X86ISD::SCALAR_SINT_TO_FP", SDTintToFP>;
def X86SintToFpRnd : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND", SDTintToFPRound>;
def X86UintToFp : SDNode<"X86ISD::SCALAR_UINT_TO_FP", SDTintToFP>;
def X86UintToFpRnd : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND", SDTintToFPRound>;
+// Scalar float/double to signed/unsigned integer with truncation.
def X86cvtts2Int : SDNode<"X86ISD::CVTTS2SI", SDTSFloatToInt>;
def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI", SDTSFloatToInt>;
def X86cvtts2IntSAE : SDNode<"X86ISD::CVTTS2SI_SAE", SDTSFloatToInt>;
def X86cvtts2UIntSAE : SDNode<"X86ISD::CVTTS2UI_SAE", SDTSFloatToInt>;
+// Scalar float/double to signed/unsigned integer.
def X86cvts2si : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>;
def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>;
def X86cvts2siRnd : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
+// Scalar float/double to signed/unsigned integer with saturation.
def X86cvttss2Int : SDNode<"X86ISD::CVTTS2SIS", SDTSFloatToInt>;
def X86cvttss2UInt : SDNode<"X86ISD::CVTTS2UIS", SDTSFloatToInt>;
def X86cvttss2IntSAE : SDNode<"X86ISD::CVTTS2SIS_SAE", SDTSFloatToInt>;
@@ -710,29 +908,27 @@ def X86cvttss2UIntSAE : SDNode<"X86ISD::CVTTS2UIS_SAE", SDTSFloatToInt>;
// Vector with rounding mode
-// cvtt fp-to-int staff
-def X86cvttp2siSAE : SDNode<"X86ISD::CVTTP2SI_SAE", SDTFloatToInt>;
-def X86cvttp2uiSAE : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>;
-
+// Convert Unsigned/Integer to Floating-Point Value with rounding mode.
def X86VSintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTVintToFPRound>;
def X86VUintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTVintToFPRound>;
+// Saturation enabled Vector float/double to signed/unsigned
+// integer with truncation.
def X86cvttp2sisSAE : SDNode<"X86ISD::CVTTP2SIS_SAE", SDTFloatToInt>;
def X86cvttp2uisSAE : SDNode<"X86ISD::CVTTP2UIS_SAE", SDTFloatToInt>;
def X86cvttp2sis : SDNode<"X86ISD::CVTTP2SIS", SDTFloatToInt>;
def X86cvttp2uis : SDNode<"X86ISD::CVTTP2UIS", SDTFloatToInt>;
-// cvt fp-to-int staff
-def X86cvtp2IntRnd : SDNode<"X86ISD::CVTP2SI_RND", SDTFloatToIntRnd>;
-def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
-
// Vector without rounding mode
-// cvtt fp-to-int staff
+// Vector float/double to signed/unsigned integer with truncation.
def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>;
def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>;
+def X86cvttp2siSAE : SDNode<"X86ISD::CVTTP2SI_SAE", SDTFloatToInt>;
+def X86cvttp2uiSAE : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>;
let IsStrictFP = true in {
+ // Vector float/double to signed/unsigned integer with truncation.
def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>;
def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>;
}
@@ -744,10 +940,12 @@ def X86any_cvttp2ui : PatFrags<(ops node:$src),
[(X86strict_cvttp2ui node:$src),
(X86cvttp2ui node:$src)]>;
+// Vector signed/unsigned integer to float/double.
def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>;
def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>;
let IsStrictFP = true in {
+ // Vector signed/unsigned integer to float/double.
def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>;
def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>;
}
@@ -760,10 +958,11 @@ def X86any_VUintToFP : PatFrags<(ops node:$src),
(X86VUintToFP node:$src)]>;
-// cvt int-to-fp staff
+// Vector float/double to signed/unsigned integer.
def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
-
+def X86cvtp2IntRnd : SDNode<"X86ISD::CVTP2SI_RND", SDTFloatToIntRnd>;
+def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
// Masked versions of above
def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
@@ -781,55 +980,59 @@ def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
def X86VMSintToFP : SDNode<"X86ISD::MCVTSI2P", SDTMVintToFP>;
def X86VMUintToFP : SDNode<"X86ISD::MCVTUI2P", SDTMVintToFP>;
+// Masked versions of CVTTS2[US]IS. Used for v2f64->v4f32.
+// SRC, PASSTHRU, MASK
def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>;
def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
+
+// Masked versions of CVTP2[SU]IS. Used for v2f64 to v4i32.
+// SRC, PASSTHRU, MASK
def X86mcvttp2sis : SDNode<"X86ISD::MCVTTP2SIS", SDTMFloatToInt>;
def X86mcvttp2uis : SDNode<"X86ISD::MCVTTP2UIS", SDTMFloatToInt>;
def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>]>;
-def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>;
-
-let IsStrictFP = true in
-def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps,
- [SDNPHasChain]>;
-
-def X86any_cvtph2ps : PatFrags<(ops node:$src),
- [(X86strict_cvtph2ps node:$src),
- (X86cvtph2ps node:$src)]>;
-
-def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>;
def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>]>;
+
+// Conversions between float and half-float.
def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>;
+def X86cvtps2phSAE : SDNode<"X86ISD::CVTPS2PH_SAE", SDTcvtps2ph>;
+def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>;
+def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>;
-let IsStrictFP = true in
-def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph,
- [SDNPHasChain]>;
+let IsStrictFP = true in {
+ // Conversions between float and half-float.
+ def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph,
+ [SDNPHasChain]>;
+ def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps,
+ [SDNPHasChain]>;
+}
+
+def X86any_cvtph2ps : PatFrags<(ops node:$src),
+ [(X86strict_cvtph2ps node:$src),
+ (X86cvtph2ps node:$src)]>;
def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2),
[(X86strict_cvtps2ph node:$src1, node:$src2),
(X86cvtps2ph node:$src1, node:$src2)]>;
-def X86cvtps2phSAE : SDNode<"X86ISD::CVTPS2PH_SAE", SDTcvtps2ph>;
-
def SDTmcvtps2ph : SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>,
SDTCisSameAs<0, 3>,
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<1, 4>]>;
+
+// Masked version of CVTPS2PH.
+// SRC, RND, PASSTHRU, MASK
def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH", SDTmcvtps2ph>;
def X86mcvtps2phSAE : SDNode<"X86ISD::MCVTPS2PH_SAE", SDTmcvtps2ph>;
-def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
- SDTCisFP<1>, SDTCisVec<1>,
- SDTCisOpSmallerThanOp<1, 0>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisFP<1>, SDTCisVec<1>,
@@ -846,16 +1049,23 @@ def X86vminmaxs : SDNode<"X86ISD::VMINMAXS", SDTypeProfile<1, 3, [SDTCisSameAs<0
def X86vminmaxsSae : SDNode<"X86ISD::VMINMAXS_SAE", SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>>;
-// cvt fp to bfloat16
+// Masked version of CVTNEPS2BF16.
+// SRC, PASSTHRU, MASK
def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, bf16>,
SDTCVecEltisVT<1, f32>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>>;
+
+// Vector float to bfloat16.
+// Convert packed single data to packed BF16 data
def X86cvtneps2bf16 : SDNode<"X86ISD::CVTNEPS2BF16",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, bf16>,
SDTCVecEltisVT<1, f32>]>>;
+
+// Dot product of BF16/FP16 pairs to accumulated into
+// packed single precision.
def X86dpbf16ps : SDNode<"X86ISD::DPBF16PS",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
SDTCisSameAs<0,1>,
@@ -867,7 +1077,7 @@ def X86dpfp16ps : SDNode<"X86ISD::DPFP16PS",
SDTCVecEltisVT<2, f16>,
SDTCisSameAs<2,3>]>>;
-// galois field arithmetic
+// Galois Field Arithmetic Instructions
def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>;
@@ -1368,15 +1578,15 @@ def masked_truncstorevi32 :
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
+// Vector truncating store with unsigned/signed saturation
def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+// Vector truncating masked store with unsigned/signed saturation
def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTX86MaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -1548,4 +1758,3 @@ def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
(X86vpmaddwd node:$lhs, node:$rhs), [{
return N->hasOneUse();
}]>;
-
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index 644d6d0b92dfc..1cdf7f9093c17 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -226,6 +226,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+// Copies a 64-bit value from the low word of an XMM vector
+// to an MMX vector.
def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
[SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td
index b1a7fc6e901de..fd966dc7c1248 100644
--- a/llvm/lib/Target/X86/X86InstrRAOINT.td
+++ b/llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -16,6 +16,8 @@
def SDTRAOBinaryArith : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+/// RAO arithmetic instructions.
+/// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
def X86rao_add : SDNode<"X86ISD::AADD", SDTRAOBinaryArith,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86rao_or : SDNode<"X86ISD::AOR", SDTRAOBinaryArith,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 806b02b9f9359..4be9e26cffa79 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5265,6 +5265,8 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
let Predicates = [HasAVX, NoBWI] in
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, WIG;
+// Extract an 8-bit value from a vector and zero extend it to
+// i32, corresponds to X86::PEXTRB.
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
diff --git a/llvm/lib/Target/X86/X86InstrTSX.td b/llvm/lib/Target/X86/X86InstrTSX.td
index 57604b682d54e..63792e7666a2e 100644
--- a/llvm/lib/Target/X86/X86InstrTSX.td
+++ b/llvm/lib/Target/X86/X86InstrTSX.td
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
// TSX instructions
+// Test if in transactional execution.
def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;
More information about the llvm-commits
mailing list