[llvm] [AArch64] TableGen-erate SDNode descriptions (PR #140472)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Sun May 18 12:24:01 PDT 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/140472
This is an attempt to continue @s-barannikov's work TableGen-erating SDNode descriptions. This takes the initial patch from #119709, moves the rest of the AArch64ISD nodes to TableGen, and fixes some issues found by the generated SDNode verification.
The main changes (alongside generating AArch64ISD nodes and verification) are:
The following (dead) AArch64ISD nodes (and associated code) are removed:
- INDEX_VECTOR
- SVE_LD2_MERGE_ZERO
- SVE_LD3_MERGE_ZERO
- SVE_LD4_MERGE_ZERO
No code path could emit any of these nodes.
Claims of having in/out glue were removed from the following nodes:
- `GLDFF1_*`
- `GLDFF1S_*`
None of these nodes were ever emitted with glue (so failed the generated verification).
The following nodes are now always emitted with a glue return value:
- `SMSTART`
- `SMSTART`
- `COALESCER_BARRIER`
The glue return value was added inconsistently for these nodes (which again failed verification).
And finally, avoid using `LowerToPredicatedOp` for shuffle vector -> `REV*_MERGE_PASSTHRU` nodes. This could add an extra unused (and invalid) parameter to the `REV*_MERGE_PASSTHRU` node.
>From dfdb35324ff3d3d4c231d16db6e27bf76130dba2 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 01:00:33 +0300
Subject: [PATCH 1/4] AArch64
---
.../Target/AArch64/AArch64ISelLowering.cpp | 332 ------------
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 498 ------------------
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 12 +-
.../AArch64/AArch64SelectionDAGInfo.cpp | 93 +++-
.../Target/AArch64/AArch64SelectionDAGInfo.h | 89 +++-
llvm/lib/Target/AArch64/CMakeLists.txt | 1 +
llvm/unittests/CodeGen/CMakeLists.txt | 1 -
.../AArch64}/AArch64SelectionDAGTest.cpp | 31 +-
llvm/unittests/Target/AArch64/CMakeLists.txt | 2 +
9 files changed, 197 insertions(+), 862 deletions(-)
rename llvm/unittests/{CodeGen => Target/AArch64}/AArch64SelectionDAGTest.cpp (97%)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c7858e4106358..be39e7a849b82 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2710,332 +2710,6 @@ AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
return AArch64::createFastISel(funcInfo, libInfo);
}
-const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
-#define MAKE_CASE(V) \
- case V: \
- return #V;
- switch ((AArch64ISD::NodeType)Opcode) {
- case AArch64ISD::FIRST_NUMBER:
- break;
- MAKE_CASE(AArch64ISD::ALLOCATE_ZA_BUFFER)
- MAKE_CASE(AArch64ISD::INIT_TPIDR2OBJ)
- MAKE_CASE(AArch64ISD::GET_SME_SAVE_SIZE)
- MAKE_CASE(AArch64ISD::ALLOC_SME_SAVE_BUFFER)
- MAKE_CASE(AArch64ISD::COALESCER_BARRIER)
- MAKE_CASE(AArch64ISD::VG_SAVE)
- MAKE_CASE(AArch64ISD::VG_RESTORE)
- MAKE_CASE(AArch64ISD::SMSTART)
- MAKE_CASE(AArch64ISD::SMSTOP)
- MAKE_CASE(AArch64ISD::RESTORE_ZA)
- MAKE_CASE(AArch64ISD::RESTORE_ZT)
- MAKE_CASE(AArch64ISD::SAVE_ZT)
- MAKE_CASE(AArch64ISD::CALL)
- MAKE_CASE(AArch64ISD::ADRP)
- MAKE_CASE(AArch64ISD::ADR)
- MAKE_CASE(AArch64ISD::ADDlow)
- MAKE_CASE(AArch64ISD::AUTH_CALL)
- MAKE_CASE(AArch64ISD::AUTH_TC_RETURN)
- MAKE_CASE(AArch64ISD::AUTH_CALL_RVMARKER)
- MAKE_CASE(AArch64ISD::LOADgot)
- MAKE_CASE(AArch64ISD::RET_GLUE)
- MAKE_CASE(AArch64ISD::BRCOND)
- MAKE_CASE(AArch64ISD::CSEL)
- MAKE_CASE(AArch64ISD::CSINV)
- MAKE_CASE(AArch64ISD::CSNEG)
- MAKE_CASE(AArch64ISD::CSINC)
- MAKE_CASE(AArch64ISD::THREAD_POINTER)
- MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
- MAKE_CASE(AArch64ISD::TLSDESC_AUTH_CALLSEQ)
- MAKE_CASE(AArch64ISD::PROBED_ALLOCA)
- MAKE_CASE(AArch64ISD::ABDS_PRED)
- MAKE_CASE(AArch64ISD::ABDU_PRED)
- MAKE_CASE(AArch64ISD::HADDS_PRED)
- MAKE_CASE(AArch64ISD::HADDU_PRED)
- MAKE_CASE(AArch64ISD::MUL_PRED)
- MAKE_CASE(AArch64ISD::MULHS_PRED)
- MAKE_CASE(AArch64ISD::MULHU_PRED)
- MAKE_CASE(AArch64ISD::RHADDS_PRED)
- MAKE_CASE(AArch64ISD::RHADDU_PRED)
- MAKE_CASE(AArch64ISD::SDIV_PRED)
- MAKE_CASE(AArch64ISD::SHL_PRED)
- MAKE_CASE(AArch64ISD::SMAX_PRED)
- MAKE_CASE(AArch64ISD::SMIN_PRED)
- MAKE_CASE(AArch64ISD::SRA_PRED)
- MAKE_CASE(AArch64ISD::SRL_PRED)
- MAKE_CASE(AArch64ISD::UDIV_PRED)
- MAKE_CASE(AArch64ISD::UMAX_PRED)
- MAKE_CASE(AArch64ISD::UMIN_PRED)
- MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
- MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FCVTX_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::ADC)
- MAKE_CASE(AArch64ISD::SBC)
- MAKE_CASE(AArch64ISD::ADDS)
- MAKE_CASE(AArch64ISD::SUBS)
- MAKE_CASE(AArch64ISD::ADCS)
- MAKE_CASE(AArch64ISD::SBCS)
- MAKE_CASE(AArch64ISD::ANDS)
- MAKE_CASE(AArch64ISD::CCMP)
- MAKE_CASE(AArch64ISD::CCMN)
- MAKE_CASE(AArch64ISD::FCCMP)
- MAKE_CASE(AArch64ISD::FCMP)
- MAKE_CASE(AArch64ISD::STRICT_FCMP)
- MAKE_CASE(AArch64ISD::STRICT_FCMPE)
- MAKE_CASE(AArch64ISD::FCVTXN)
- MAKE_CASE(AArch64ISD::SME_ZA_LDR)
- MAKE_CASE(AArch64ISD::SME_ZA_STR)
- MAKE_CASE(AArch64ISD::DUP)
- MAKE_CASE(AArch64ISD::DUPLANE8)
- MAKE_CASE(AArch64ISD::DUPLANE16)
- MAKE_CASE(AArch64ISD::DUPLANE32)
- MAKE_CASE(AArch64ISD::DUPLANE64)
- MAKE_CASE(AArch64ISD::DUPLANE128)
- MAKE_CASE(AArch64ISD::MOVI)
- MAKE_CASE(AArch64ISD::MOVIshift)
- MAKE_CASE(AArch64ISD::MOVIedit)
- MAKE_CASE(AArch64ISD::MOVImsl)
- MAKE_CASE(AArch64ISD::FMOV)
- MAKE_CASE(AArch64ISD::MVNIshift)
- MAKE_CASE(AArch64ISD::MVNImsl)
- MAKE_CASE(AArch64ISD::BICi)
- MAKE_CASE(AArch64ISD::ORRi)
- MAKE_CASE(AArch64ISD::BSP)
- MAKE_CASE(AArch64ISD::ZIP1)
- MAKE_CASE(AArch64ISD::ZIP2)
- MAKE_CASE(AArch64ISD::UZP1)
- MAKE_CASE(AArch64ISD::UZP2)
- MAKE_CASE(AArch64ISD::TRN1)
- MAKE_CASE(AArch64ISD::TRN2)
- MAKE_CASE(AArch64ISD::REV16)
- MAKE_CASE(AArch64ISD::REV32)
- MAKE_CASE(AArch64ISD::REV64)
- MAKE_CASE(AArch64ISD::EXT)
- MAKE_CASE(AArch64ISD::SPLICE)
- MAKE_CASE(AArch64ISD::VSHL)
- MAKE_CASE(AArch64ISD::VLSHR)
- MAKE_CASE(AArch64ISD::VASHR)
- MAKE_CASE(AArch64ISD::VSLI)
- MAKE_CASE(AArch64ISD::VSRI)
- MAKE_CASE(AArch64ISD::FCMEQ)
- MAKE_CASE(AArch64ISD::FCMGE)
- MAKE_CASE(AArch64ISD::FCMGT)
- MAKE_CASE(AArch64ISD::SADDV)
- MAKE_CASE(AArch64ISD::UADDV)
- MAKE_CASE(AArch64ISD::UADDLV)
- MAKE_CASE(AArch64ISD::SADDLV)
- MAKE_CASE(AArch64ISD::SADDWT)
- MAKE_CASE(AArch64ISD::SADDWB)
- MAKE_CASE(AArch64ISD::UADDWT)
- MAKE_CASE(AArch64ISD::UADDWB)
- MAKE_CASE(AArch64ISD::SDOT)
- MAKE_CASE(AArch64ISD::UDOT)
- MAKE_CASE(AArch64ISD::USDOT)
- MAKE_CASE(AArch64ISD::SMINV)
- MAKE_CASE(AArch64ISD::UMINV)
- MAKE_CASE(AArch64ISD::SMAXV)
- MAKE_CASE(AArch64ISD::UMAXV)
- MAKE_CASE(AArch64ISD::SADDV_PRED)
- MAKE_CASE(AArch64ISD::UADDV_PRED)
- MAKE_CASE(AArch64ISD::SMAXV_PRED)
- MAKE_CASE(AArch64ISD::UMAXV_PRED)
- MAKE_CASE(AArch64ISD::SMINV_PRED)
- MAKE_CASE(AArch64ISD::UMINV_PRED)
- MAKE_CASE(AArch64ISD::ORV_PRED)
- MAKE_CASE(AArch64ISD::EORV_PRED)
- MAKE_CASE(AArch64ISD::ANDV_PRED)
- MAKE_CASE(AArch64ISD::CLASTA_N)
- MAKE_CASE(AArch64ISD::CLASTB_N)
- MAKE_CASE(AArch64ISD::LASTA)
- MAKE_CASE(AArch64ISD::LASTB)
- MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
- MAKE_CASE(AArch64ISD::LS64_BUILD)
- MAKE_CASE(AArch64ISD::LS64_EXTRACT)
- MAKE_CASE(AArch64ISD::TBL)
- MAKE_CASE(AArch64ISD::FADD_PRED)
- MAKE_CASE(AArch64ISD::FADDA_PRED)
- MAKE_CASE(AArch64ISD::FADDV_PRED)
- MAKE_CASE(AArch64ISD::FDIV_PRED)
- MAKE_CASE(AArch64ISD::FMA_PRED)
- MAKE_CASE(AArch64ISD::FMAX_PRED)
- MAKE_CASE(AArch64ISD::FMAXV_PRED)
- MAKE_CASE(AArch64ISD::FMAXNM_PRED)
- MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
- MAKE_CASE(AArch64ISD::FMIN_PRED)
- MAKE_CASE(AArch64ISD::FMINV_PRED)
- MAKE_CASE(AArch64ISD::FMINNM_PRED)
- MAKE_CASE(AArch64ISD::FMINNMV_PRED)
- MAKE_CASE(AArch64ISD::FMUL_PRED)
- MAKE_CASE(AArch64ISD::FSUB_PRED)
- MAKE_CASE(AArch64ISD::RDSVL)
- MAKE_CASE(AArch64ISD::BIC)
- MAKE_CASE(AArch64ISD::CBZ)
- MAKE_CASE(AArch64ISD::CBNZ)
- MAKE_CASE(AArch64ISD::TBZ)
- MAKE_CASE(AArch64ISD::TBNZ)
- MAKE_CASE(AArch64ISD::TC_RETURN)
- MAKE_CASE(AArch64ISD::PREFETCH)
- MAKE_CASE(AArch64ISD::SITOF)
- MAKE_CASE(AArch64ISD::UITOF)
- MAKE_CASE(AArch64ISD::NVCAST)
- MAKE_CASE(AArch64ISD::MRS)
- MAKE_CASE(AArch64ISD::SQSHL_I)
- MAKE_CASE(AArch64ISD::UQSHL_I)
- MAKE_CASE(AArch64ISD::SRSHR_I)
- MAKE_CASE(AArch64ISD::URSHR_I)
- MAKE_CASE(AArch64ISD::SQSHLU_I)
- MAKE_CASE(AArch64ISD::WrapperLarge)
- MAKE_CASE(AArch64ISD::LD2post)
- MAKE_CASE(AArch64ISD::LD3post)
- MAKE_CASE(AArch64ISD::LD4post)
- MAKE_CASE(AArch64ISD::ST2post)
- MAKE_CASE(AArch64ISD::ST3post)
- MAKE_CASE(AArch64ISD::ST4post)
- MAKE_CASE(AArch64ISD::LD1x2post)
- MAKE_CASE(AArch64ISD::LD1x3post)
- MAKE_CASE(AArch64ISD::LD1x4post)
- MAKE_CASE(AArch64ISD::ST1x2post)
- MAKE_CASE(AArch64ISD::ST1x3post)
- MAKE_CASE(AArch64ISD::ST1x4post)
- MAKE_CASE(AArch64ISD::LD1DUPpost)
- MAKE_CASE(AArch64ISD::LD2DUPpost)
- MAKE_CASE(AArch64ISD::LD3DUPpost)
- MAKE_CASE(AArch64ISD::LD4DUPpost)
- MAKE_CASE(AArch64ISD::LD1LANEpost)
- MAKE_CASE(AArch64ISD::LD2LANEpost)
- MAKE_CASE(AArch64ISD::LD3LANEpost)
- MAKE_CASE(AArch64ISD::LD4LANEpost)
- MAKE_CASE(AArch64ISD::ST2LANEpost)
- MAKE_CASE(AArch64ISD::ST3LANEpost)
- MAKE_CASE(AArch64ISD::ST4LANEpost)
- MAKE_CASE(AArch64ISD::SMULL)
- MAKE_CASE(AArch64ISD::UMULL)
- MAKE_CASE(AArch64ISD::PMULL)
- MAKE_CASE(AArch64ISD::FRECPE)
- MAKE_CASE(AArch64ISD::FRECPS)
- MAKE_CASE(AArch64ISD::FRSQRTE)
- MAKE_CASE(AArch64ISD::FRSQRTS)
- MAKE_CASE(AArch64ISD::STG)
- MAKE_CASE(AArch64ISD::STZG)
- MAKE_CASE(AArch64ISD::ST2G)
- MAKE_CASE(AArch64ISD::STZ2G)
- MAKE_CASE(AArch64ISD::SUNPKHI)
- MAKE_CASE(AArch64ISD::SUNPKLO)
- MAKE_CASE(AArch64ISD::UUNPKHI)
- MAKE_CASE(AArch64ISD::UUNPKLO)
- MAKE_CASE(AArch64ISD::INSR)
- MAKE_CASE(AArch64ISD::PTEST)
- MAKE_CASE(AArch64ISD::PTEST_ANY)
- MAKE_CASE(AArch64ISD::PTRUE)
- MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1Q_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1Q_INDEX_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::SST1Q_PRED)
- MAKE_CASE(AArch64ISD::SST1Q_INDEX_PRED)
- MAKE_CASE(AArch64ISD::ST1_PRED)
- MAKE_CASE(AArch64ISD::SST1_PRED)
- MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
- MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
- MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
- MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
- MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
- MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
- MAKE_CASE(AArch64ISD::SSTNT1_PRED)
- MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
- MAKE_CASE(AArch64ISD::LDP)
- MAKE_CASE(AArch64ISD::LDIAPP)
- MAKE_CASE(AArch64ISD::LDNP)
- MAKE_CASE(AArch64ISD::STP)
- MAKE_CASE(AArch64ISD::STILP)
- MAKE_CASE(AArch64ISD::STNP)
- MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::INDEX_VECTOR)
- MAKE_CASE(AArch64ISD::ADDP)
- MAKE_CASE(AArch64ISD::SADDLP)
- MAKE_CASE(AArch64ISD::UADDLP)
- MAKE_CASE(AArch64ISD::CALL_RVMARKER)
- MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
- MAKE_CASE(AArch64ISD::CALL_BTI)
- MAKE_CASE(AArch64ISD::MRRS)
- MAKE_CASE(AArch64ISD::MSRR)
- MAKE_CASE(AArch64ISD::RSHRNB_I)
- MAKE_CASE(AArch64ISD::CTTZ_ELTS)
- MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64)
- MAKE_CASE(AArch64ISD::URSHR_I_PRED)
- MAKE_CASE(AArch64ISD::CB)
- }
-#undef MAKE_CASE
- return nullptr;
-}
-
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -23299,12 +22973,6 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
- assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
- Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) ||
- (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
- Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&
- "Invalid opcode.");
-
const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index c1e6d70099fa5..1924d20f67f49 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -23,502 +23,6 @@
namespace llvm {
-namespace AArch64ISD {
-
-// For predicated nodes where the result is a vector, the operation is
-// controlled by a governing predicate and the inactive lanes are explicitly
-// defined with a value, please stick the following naming convention:
-//
-// _MERGE_OP<n> The result value is a vector with inactive lanes equal
-// to source operand OP<n>.
-//
-// _MERGE_ZERO The result value is a vector with inactive lanes
-// actively zeroed.
-//
-// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
-// to the last source operand which only purpose is being
-// a passthru value.
-//
-// For other cases where no explicit action is needed to set the inactive lanes,
-// or when the result is not a vector and it is needed or helpful to
-// distinguish a node from similar unpredicated nodes, use:
-//
-// _PRED
-//
-enum NodeType : unsigned {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
- CALL, // Function call.
-
- // Pseudo for a OBJC call that gets emitted together with a special `mov
- // x29, x29` marker instruction.
- CALL_RVMARKER,
-
- CALL_BTI, // Function call followed by a BTI instruction.
-
- // Function call, authenticating the callee value first:
- // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
- AUTH_CALL,
- // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
- // operands.
- AUTH_TC_RETURN,
-
- // Authenticated variant of CALL_RVMARKER.
- AUTH_CALL_RVMARKER,
-
- COALESCER_BARRIER,
-
- VG_SAVE,
- VG_RESTORE,
-
- SMSTART,
- SMSTOP,
- RESTORE_ZA,
- RESTORE_ZT,
- SAVE_ZT,
-
- // A call with the callee in x16, i.e. "blr x16".
- CALL_ARM64EC_TO_X64,
-
- // Produces the full sequence of instructions for getting the thread pointer
- // offset of a variable into X0, using the TLSDesc model.
- TLSDESC_CALLSEQ,
- TLSDESC_AUTH_CALLSEQ,
- ADRP, // Page address of a TargetGlobalAddress operand.
- ADR, // ADR
- ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
- LOADgot, // Load from automatically generated descriptor (e.g. Global
- // Offset Table, TLS record).
- RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
- BRCOND, // Conditional branch instruction; "b.cond".
- CSEL,
- CSINV, // Conditional select invert.
- CSNEG, // Conditional select negate.
- CSINC, // Conditional select increment.
-
- // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
- // ELF.
- THREAD_POINTER,
- ADC,
- SBC, // adc, sbc instructions
-
- // To avoid stack clash, allocation is performed by block and each block is
- // probed.
- PROBED_ALLOCA,
-
- // Predicated instructions where inactive lanes produce undefined results.
- ABDS_PRED,
- ABDU_PRED,
- FADD_PRED,
- FDIV_PRED,
- FMA_PRED,
- FMAX_PRED,
- FMAXNM_PRED,
- FMIN_PRED,
- FMINNM_PRED,
- FMUL_PRED,
- FSUB_PRED,
- HADDS_PRED,
- HADDU_PRED,
- MUL_PRED,
- MULHS_PRED,
- MULHU_PRED,
- RHADDS_PRED,
- RHADDU_PRED,
- SDIV_PRED,
- SHL_PRED,
- SMAX_PRED,
- SMIN_PRED,
- SRA_PRED,
- SRL_PRED,
- UDIV_PRED,
- UMAX_PRED,
- UMIN_PRED,
-
- // Unpredicated vector instructions
- BIC,
-
- SRAD_MERGE_OP1,
-
- // Predicated instructions with the result of inactive lanes provided by the
- // last operand.
- FABS_MERGE_PASSTHRU,
- FCEIL_MERGE_PASSTHRU,
- FFLOOR_MERGE_PASSTHRU,
- FNEARBYINT_MERGE_PASSTHRU,
- FNEG_MERGE_PASSTHRU,
- FRECPX_MERGE_PASSTHRU,
- FRINT_MERGE_PASSTHRU,
- FROUND_MERGE_PASSTHRU,
- FROUNDEVEN_MERGE_PASSTHRU,
- FSQRT_MERGE_PASSTHRU,
- FTRUNC_MERGE_PASSTHRU,
- FP_ROUND_MERGE_PASSTHRU,
- FP_EXTEND_MERGE_PASSTHRU,
- UINT_TO_FP_MERGE_PASSTHRU,
- SINT_TO_FP_MERGE_PASSTHRU,
- FCVTX_MERGE_PASSTHRU,
- FCVTZU_MERGE_PASSTHRU,
- FCVTZS_MERGE_PASSTHRU,
- SIGN_EXTEND_INREG_MERGE_PASSTHRU,
- ZERO_EXTEND_INREG_MERGE_PASSTHRU,
- ABS_MERGE_PASSTHRU,
- NEG_MERGE_PASSTHRU,
-
- SETCC_MERGE_ZERO,
-
- // Arithmetic instructions which write flags.
- ADDS,
- SUBS,
- ADCS,
- SBCS,
- ANDS,
-
- // Conditional compares. Operands: left,right,falsecc,cc,flags
- CCMP,
- CCMN,
- FCCMP,
-
- // Floating point comparison
- FCMP,
-
- // Scalar-to-vector duplication
- DUP,
- DUPLANE8,
- DUPLANE16,
- DUPLANE32,
- DUPLANE64,
- DUPLANE128,
-
- // Vector immedate moves
- MOVI,
- MOVIshift,
- MOVIedit,
- MOVImsl,
- FMOV,
- MVNIshift,
- MVNImsl,
-
- // Vector immediate ops
- BICi,
- ORRi,
-
- // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
- // element must be identical.
- BSP,
-
- // Vector shuffles
- ZIP1,
- ZIP2,
- UZP1,
- UZP2,
- TRN1,
- TRN2,
- REV16,
- REV32,
- REV64,
- EXT,
- SPLICE,
-
- // Vector shift by scalar
- VSHL,
- VLSHR,
- VASHR,
-
- // Vector shift by scalar (again)
- SQSHL_I,
- UQSHL_I,
- SQSHLU_I,
- SRSHR_I,
- URSHR_I,
- URSHR_I_PRED,
-
- // Vector narrowing shift by immediate (bottom)
- RSHRNB_I,
-
- // Vector shift by constant and insert
- VSLI,
- VSRI,
-
- // Vector comparisons
- FCMEQ,
- FCMGE,
- FCMGT,
-
- // Round wide FP to narrow FP with inexact results to odd.
- FCVTXN,
-
- // Vector across-lanes addition
- // Only the lower result lane is defined.
- SADDV,
- UADDV,
-
- // Unsigned sum Long across Vector
- UADDLV,
- SADDLV,
-
- // Wide adds
- SADDWT,
- SADDWB,
- UADDWT,
- UADDWB,
-
- // Add Pairwise of two vectors
- ADDP,
- // Add Long Pairwise
- SADDLP,
- UADDLP,
-
- // udot/sdot/usdot instructions
- UDOT,
- SDOT,
- USDOT,
-
- // Vector across-lanes min/max
- // Only the lower result lane is defined.
- SMINV,
- UMINV,
- SMAXV,
- UMAXV,
-
- SADDV_PRED,
- UADDV_PRED,
- SMAXV_PRED,
- UMAXV_PRED,
- SMINV_PRED,
- UMINV_PRED,
- ORV_PRED,
- EORV_PRED,
- ANDV_PRED,
-
- // Compare-and-branch
- CBZ,
- CBNZ,
- TBZ,
- TBNZ,
-
- // Tail calls
- TC_RETURN,
-
- // Custom prefetch handling
- PREFETCH,
-
- // {s|u}int to FP within a FP register.
- SITOF,
- UITOF,
-
- /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
- /// world w.r.t vectors; which causes additional REV instructions to be
- /// generated to compensate for the byte-swapping. But sometimes we do
- /// need to re-interpret the data in SIMD vector registers in big-endian
- /// mode without emitting such REV instructions.
- NVCAST,
-
- MRS, // MRS, also sets the flags via a glue.
-
- SMULL,
- UMULL,
-
- PMULL,
-
- // Reciprocal estimates and steps.
- FRECPE,
- FRECPS,
- FRSQRTE,
- FRSQRTS,
-
- SUNPKHI,
- SUNPKLO,
- UUNPKHI,
- UUNPKLO,
-
- CLASTA_N,
- CLASTB_N,
- LASTA,
- LASTB,
- TBL,
-
- // Floating-point reductions.
- FADDA_PRED,
- FADDV_PRED,
- FMAXV_PRED,
- FMAXNMV_PRED,
- FMINV_PRED,
- FMINNMV_PRED,
-
- INSR,
- PTEST,
- PTEST_ANY,
- PTRUE,
-
- CTTZ_ELTS,
-
- BITREVERSE_MERGE_PASSTHRU,
- BSWAP_MERGE_PASSTHRU,
- REVH_MERGE_PASSTHRU,
- REVW_MERGE_PASSTHRU,
- CTLZ_MERGE_PASSTHRU,
- CTPOP_MERGE_PASSTHRU,
- DUP_MERGE_PASSTHRU,
- INDEX_VECTOR,
-
- // Cast between vectors of the same element type but differ in length.
- REINTERPRET_CAST,
-
- // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
- LS64_BUILD,
- LS64_EXTRACT,
-
- LD1_MERGE_ZERO,
- LD1S_MERGE_ZERO,
- LDNF1_MERGE_ZERO,
- LDNF1S_MERGE_ZERO,
- LDFF1_MERGE_ZERO,
- LDFF1S_MERGE_ZERO,
- LD1RQ_MERGE_ZERO,
- LD1RO_MERGE_ZERO,
-
- // Structured loads.
- SVE_LD2_MERGE_ZERO,
- SVE_LD3_MERGE_ZERO,
- SVE_LD4_MERGE_ZERO,
-
- // Unsigned gather loads.
- GLD1_MERGE_ZERO,
- GLD1_SCALED_MERGE_ZERO,
- GLD1_UXTW_MERGE_ZERO,
- GLD1_SXTW_MERGE_ZERO,
- GLD1_UXTW_SCALED_MERGE_ZERO,
- GLD1_SXTW_SCALED_MERGE_ZERO,
- GLD1_IMM_MERGE_ZERO,
- GLD1Q_MERGE_ZERO,
- GLD1Q_INDEX_MERGE_ZERO,
-
- // Signed gather loads
- GLD1S_MERGE_ZERO,
- GLD1S_SCALED_MERGE_ZERO,
- GLD1S_UXTW_MERGE_ZERO,
- GLD1S_SXTW_MERGE_ZERO,
- GLD1S_UXTW_SCALED_MERGE_ZERO,
- GLD1S_SXTW_SCALED_MERGE_ZERO,
- GLD1S_IMM_MERGE_ZERO,
-
- // Unsigned gather loads.
- GLDFF1_MERGE_ZERO,
- GLDFF1_SCALED_MERGE_ZERO,
- GLDFF1_UXTW_MERGE_ZERO,
- GLDFF1_SXTW_MERGE_ZERO,
- GLDFF1_UXTW_SCALED_MERGE_ZERO,
- GLDFF1_SXTW_SCALED_MERGE_ZERO,
- GLDFF1_IMM_MERGE_ZERO,
-
- // Signed gather loads.
- GLDFF1S_MERGE_ZERO,
- GLDFF1S_SCALED_MERGE_ZERO,
- GLDFF1S_UXTW_MERGE_ZERO,
- GLDFF1S_SXTW_MERGE_ZERO,
- GLDFF1S_UXTW_SCALED_MERGE_ZERO,
- GLDFF1S_SXTW_SCALED_MERGE_ZERO,
- GLDFF1S_IMM_MERGE_ZERO,
-
- // Non-temporal gather loads
- GLDNT1_MERGE_ZERO,
- GLDNT1_INDEX_MERGE_ZERO,
- GLDNT1S_MERGE_ZERO,
-
- // Contiguous masked store.
- ST1_PRED,
-
- // Scatter store
- SST1_PRED,
- SST1_SCALED_PRED,
- SST1_UXTW_PRED,
- SST1_SXTW_PRED,
- SST1_UXTW_SCALED_PRED,
- SST1_SXTW_SCALED_PRED,
- SST1_IMM_PRED,
- SST1Q_PRED,
- SST1Q_INDEX_PRED,
-
- // Non-temporal scatter store
- SSTNT1_PRED,
- SSTNT1_INDEX_PRED,
-
- // SME
- RDSVL,
- REVD_MERGE_PASSTHRU,
- ALLOCATE_ZA_BUFFER,
- INIT_TPIDR2OBJ,
-
- // Needed for __arm_agnostic("sme_za_state")
- GET_SME_SAVE_SIZE,
- ALLOC_SME_SAVE_BUFFER,
-
- // Asserts that a function argument (i32) is zero-extended to i8 by
- // the caller
- ASSERT_ZEXT_BOOL,
-
- // 128-bit system register accesses
- // lo64, hi64, chain = MRRS(chain, sysregname)
- MRRS,
- // chain = MSRR(chain, sysregname, lo64, hi64)
- MSRR,
-
- // Strict (exception-raising) floating point comparison
- FIRST_STRICTFP_OPCODE,
- STRICT_FCMP = FIRST_STRICTFP_OPCODE,
- STRICT_FCMPE,
- LAST_STRICTFP_OPCODE = STRICT_FCMPE,
-
- // NEON Load/Store with post-increment base updates
- FIRST_MEMORY_OPCODE,
- LD2post = FIRST_MEMORY_OPCODE,
- LD3post,
- LD4post,
- ST2post,
- ST3post,
- ST4post,
- LD1x2post,
- LD1x3post,
- LD1x4post,
- ST1x2post,
- ST1x3post,
- ST1x4post,
- LD1DUPpost,
- LD2DUPpost,
- LD3DUPpost,
- LD4DUPpost,
- LD1LANEpost,
- LD2LANEpost,
- LD3LANEpost,
- LD4LANEpost,
- ST2LANEpost,
- ST3LANEpost,
- ST4LANEpost,
-
- STG,
- STZG,
- ST2G,
- STZ2G,
-
- LDP,
- LDIAPP,
- LDNP,
- STP,
- STILP,
- STNP,
- LAST_MEMORY_OPCODE = STNP,
-
- // SME ZA loads and stores
- SME_ZA_LDR,
- SME_ZA_STR,
-
- // Compare-and-branch
- CB,
-};
-
-} // end namespace AArch64ISD
-
namespace AArch64 {
/// Possible values of current rounding mode, which is specified in bits
/// 23:22 of FPCR.
@@ -625,8 +129,6 @@ class AArch64TargetLowering : public TargetLowering {
/// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- const char *getTargetNodeName(unsigned Opcode) const override;
-
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
/// This method returns a target specific FastISel object, or null if the
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 010c7c391527f..cba2559826d97 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -797,10 +797,14 @@ def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
-def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
- [SDNPHasChain]>;
-def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
- [SDNPHasChain]>;
+
+let IsStrictFP = true in {
+ def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
+ [SDNPHasChain]>;
+ def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
+ [SDNPHasChain]>;
+}
+
def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(AArch64strict_fcmp node:$lhs, node:$rhs),
(AArch64fcmp node:$lhs, node:$rhs)]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 2273e1c0ffa6e..1c903765efc5a 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -10,9 +10,13 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64SelectionDAGInfo.h"
#include "AArch64TargetMachine.h"
#include "Utils/AArch64SMEAttributes.h"
+#define GET_SDNODE_DESC
+#include "AArch64GenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
@@ -23,14 +27,62 @@ static cl::opt<bool>
"to lower to librt functions"),
cl::init(true));
-bool AArch64SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= AArch64ISD::FIRST_MEMORY_OPCODE &&
- Opcode <= AArch64ISD::LAST_MEMORY_OPCODE;
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(AArch64GenSDNodeInfo) {}
+
+const char *AArch64SelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V) \
+ case V: \
+ return #V;
+
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<AArch64ISD::NodeType>(Opcode)) {
+ MAKE_CASE(AArch64ISD::LD2post)
+ MAKE_CASE(AArch64ISD::LD3post)
+ MAKE_CASE(AArch64ISD::LD4post)
+ MAKE_CASE(AArch64ISD::ST2post)
+ MAKE_CASE(AArch64ISD::ST3post)
+ MAKE_CASE(AArch64ISD::ST4post)
+ MAKE_CASE(AArch64ISD::LD1x2post)
+ MAKE_CASE(AArch64ISD::LD1x3post)
+ MAKE_CASE(AArch64ISD::LD1x4post)
+ MAKE_CASE(AArch64ISD::ST1x2post)
+ MAKE_CASE(AArch64ISD::ST1x3post)
+ MAKE_CASE(AArch64ISD::ST1x4post)
+ MAKE_CASE(AArch64ISD::LD1DUPpost)
+ MAKE_CASE(AArch64ISD::LD2DUPpost)
+ MAKE_CASE(AArch64ISD::LD3DUPpost)
+ MAKE_CASE(AArch64ISD::LD4DUPpost)
+ MAKE_CASE(AArch64ISD::LD1LANEpost)
+ MAKE_CASE(AArch64ISD::LD2LANEpost)
+ MAKE_CASE(AArch64ISD::LD3LANEpost)
+ MAKE_CASE(AArch64ISD::LD4LANEpost)
+ MAKE_CASE(AArch64ISD::ST2LANEpost)
+ MAKE_CASE(AArch64ISD::ST3LANEpost)
+ MAKE_CASE(AArch64ISD::ST4LANEpost)
+ MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1Q_INDEX_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SST1Q_INDEX_PRED)
+ MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
+ MAKE_CASE(AArch64ISD::INDEX_VECTOR)
+ MAKE_CASE(AArch64ISD::MRRS)
+ MAKE_CASE(AArch64ISD::MSRR)
+ }
+#undef MAKE_CASE
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
}
-bool AArch64SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
- return Opcode >= AArch64ISD::FIRST_STRICTFP_OPCODE &&
- Opcode <= AArch64ISD::LAST_STRICTFP_OPCODE;
+bool AArch64SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
+ // These nodes don't have corresponding entries in *.td files yet.
+ if (Opcode >= AArch64ISD::FIRST_MEMORY_OPCODE &&
+ Opcode <= AArch64ISD::LAST_MEMORY_OPCODE)
+ return true;
+
+ return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
}
void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
@@ -38,7 +90,34 @@ void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
#ifndef NDEBUG
switch (N->getOpcode()) {
default:
- break;
+ return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+ case AArch64ISD::GLDFF1S_IMM_MERGE_ZERO:
+ case AArch64ISD::GLDFF1S_MERGE_ZERO:
+ case AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::LDFF1S_MERGE_ZERO:
+ case AArch64ISD::LDFF1_MERGE_ZERO:
+ case AArch64ISD::LDNF1S_MERGE_ZERO:
+ case AArch64ISD::LDNF1_MERGE_ZERO:
+ // invalid number of results; expected 3, got 2
+ case AArch64ISD::SMSTOP:
+ case AArch64ISD::COALESCER_BARRIER:
+ // invalid number of results; expected 2, got 1
+ case AArch64ISD::SMSTART:
+ // variadic operand #3 must be Register or RegisterMask
+ case AArch64ISD::REVD_MERGE_PASSTHRU:
+ // invalid number of operands; expected 3, got 4
+ return;
case AArch64ISD::SADDWT:
case AArch64ISD::SADDWB:
case AArch64ISD::UADDWT:
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 9c11833b3f67e..4f84672053a3c 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -14,14 +14,97 @@
#define LLVM_LIB_TARGET_AARCH64_AARCH64SELECTIONDAGINFO_H
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/IR/RuntimeLibcalls.h"
+
+#define GET_SDNODE_ENUM
+#include "AArch64GenSDNodeInfo.inc"
namespace llvm {
+namespace AArch64ISD {
+
+// For predicated nodes where the result is a vector, the operation is
+// controlled by a governing predicate and the inactive lanes are explicitly
+// defined with a value, please stick the following naming convention:
+//
+// _MERGE_OP<n> The result value is a vector with inactive lanes equal
+// to source operand OP<n>.
+//
+// _MERGE_ZERO The result value is a vector with inactive lanes
+// actively zeroed.
+//
+// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
+// to the last source operand which only purpose is being
+// a passthru value.
+//
+// For other cases where no explicit action is needed to set the inactive lanes,
+// or when the result is not a vector and it is needed or helpful to
+// distinguish a node from similar unpredicated nodes, use:
+//
+// _PRED
+//
+enum NodeType : unsigned {
+ INDEX_VECTOR = GENERATED_OPCODE_END,
+
+ // Structured loads.
+ SVE_LD2_MERGE_ZERO,
+ SVE_LD3_MERGE_ZERO,
+ SVE_LD4_MERGE_ZERO,
+
+ // Unsigned gather loads.
+ GLD1Q_INDEX_MERGE_ZERO,
-class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
+ // Non-temporal gather loads
+ GLDNT1_INDEX_MERGE_ZERO,
+
+ // Scatter store
+ SST1Q_INDEX_PRED,
+
+ // Non-temporal scatter store
+ SSTNT1_INDEX_PRED,
+
+ // 128-bit system register accesses
+ // lo64, hi64, chain = MRRS(chain, sysregname)
+ MRRS,
+ // chain = MSRR(chain, sysregname, lo64, hi64)
+ MSRR,
+
+ // NEON Load/Store with post-increment base updates
+ FIRST_MEMORY_OPCODE,
+ LD2post = FIRST_MEMORY_OPCODE,
+ LD3post,
+ LD4post,
+ ST2post,
+ ST3post,
+ ST4post,
+ LD1x2post,
+ LD1x3post,
+ LD1x4post,
+ ST1x2post,
+ ST1x3post,
+ ST1x4post,
+ LD1DUPpost,
+ LD2DUPpost,
+ LD3DUPpost,
+ LD4DUPpost,
+ LD1LANEpost,
+ LD2LANEpost,
+ LD3LANEpost,
+ LD4LANEpost,
+ ST2LANEpost,
+ ST3LANEpost,
+ ST4LANEpost,
+ LAST_MEMORY_OPCODE = ST4LANEpost,
+};
+
+} // namespace AArch64ISD
+
+class AArch64SelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
- bool isTargetMemoryOpcode(unsigned Opcode) const override;
+ AArch64SelectionDAGInfo();
- bool isTargetStrictFPOpcode(unsigned Opcode) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ bool isTargetMemoryOpcode(unsigned Opcode) const override;
void verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const override;
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 583003f2f46e6..9cf6f8a86b7d6 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -24,6 +24,7 @@ tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM AArch64GenRegisterBank.inc -gen-register-bank)
tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM AArch64GenSystemOperands.inc -gen-searchable-tables)
tablegen(LLVM AArch64GenExegesis.inc -gen-exegesis)
diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt
index d1677cdaeceac..8b025219c46cf 100644
--- a/llvm/unittests/CodeGen/CMakeLists.txt
+++ b/llvm/unittests/CodeGen/CMakeLists.txt
@@ -20,7 +20,6 @@ set(LLVM_LINK_COMPONENTS
)
add_llvm_unittest(CodeGenTests
- AArch64SelectionDAGTest.cpp
AllocationOrderTest.cpp
AMDGPUMetadataTest.cpp
AsmPrinterDwarfTest.cpp
diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
similarity index 97%
rename from llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
rename to llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index cf92bdc281637..6d0635df7b61e 100644
--- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -1,4 +1,4 @@
-//===- llvm/unittest/CodeGen/AArch64SelectionDAGTest.cpp -------------------------===//
+//===- llvm/unittest/CodeGen/AArch64SelectionDAGTest.cpp ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "../lib/Target/AArch64/AArch64ISelLowering.h"
+#include "AArch64SelectionDAGInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/AsmParser/Parser.h"
@@ -27,8 +27,9 @@ namespace llvm {
class AArch64SelectionDAGTest : public testing::Test {
protected:
static void SetUpTestCase() {
- InitializeAllTargets();
- InitializeAllTargetMCs();
+ LLVMInitializeAArch64TargetInfo();
+ LLVMInitializeAArch64Target();
+ LLVMInitializeAArch64TargetMC();
}
void SetUp() override {
@@ -37,18 +38,11 @@ class AArch64SelectionDAGTest : public testing::Test {
Triple TargetTriple("aarch64--");
std::string Error;
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
- // FIXME: These tests do not depend on AArch64 specifically, but we have to
- // initialize a target. A skeleton Target for unittests would allow us to
- // always run these tests.
- if (!T)
- GTEST_SKIP();
TargetOptions Options;
TM = std::unique_ptr<TargetMachine>(
T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt,
std::nullopt, CodeGenOptLevel::Aggressive));
- if (!TM)
- GTEST_SKIP();
SMDiagnostic SMError;
M = parseAssemblyString(Assembly, SMError, Context);
@@ -144,7 +138,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_SIGN_EXTEND_VECTOR_INREG) {
EXPECT_EQ(DAG->ComputeNumSignBits(Op, DemandedElts), 15u);
}
-TEST_F(AArch64SelectionDAGTest, ComputeNumSignBitsSVE_SIGN_EXTEND_VECTOR_INREG) {
+TEST_F(AArch64SelectionDAGTest,
+ ComputeNumSignBitsSVE_SIGN_EXTEND_VECTOR_INREG) {
SDLoc Loc;
auto Int8VT = EVT::getIntegerVT(Context, 8);
auto Int16VT = EVT::getIntegerVT(Context, 16);
@@ -453,7 +448,7 @@ TEST_F(AArch64SelectionDAGTest, isSplatValue_Scalable_SPLAT_VECTOR) {
EXPECT_TRUE(DAG->isSplatValue(Op, /*AllowUndefs=*/false));
APInt UndefElts;
- APInt DemandedElts(1,1);
+ APInt DemandedElts(1, 1);
EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts));
}
@@ -492,7 +487,8 @@ TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Fixed_BUILD_VECTOR) {
EXPECT_EQ(SplatIdx, 0);
}
-TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Fixed_ADD_of_BUILD_VECTOR) {
+TEST_F(AArch64SelectionDAGTest,
+ getSplatSourceVector_Fixed_ADD_of_BUILD_VECTOR) {
TargetLowering TL(*TM);
SDLoc Loc;
@@ -525,7 +521,8 @@ TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Scalable_SPLAT_VECTOR) {
EXPECT_EQ(SplatIdx, 0);
}
-TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Scalable_ADD_of_SPLAT_VECTOR) {
+TEST_F(AArch64SelectionDAGTest,
+ getSplatSourceVector_Scalable_ADD_of_SPLAT_VECTOR) {
TargetLowering TL(*TM);
SDLoc Loc;
@@ -560,7 +557,7 @@ TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) {
// Build some repeating sequences.
SmallVector<SDValue, 16> Pattern1111, Pattern1133, Pattern0123;
- for(int I = 0; I != 4; ++I) {
+ for (int I = 0; I != 4; ++I) {
Pattern1111.append(4, Val1);
Pattern1133.append(2, Val1);
Pattern1133.append(2, Val3);
@@ -597,7 +594,7 @@ TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) {
cast<BuildVectorSDNode>(DAG->getBuildVector(VecVT, Loc, Pattern1111));
auto *BV1133 =
cast<BuildVectorSDNode>(DAG->getBuildVector(VecVT, Loc, Pattern1133));
- auto *BV0123=
+ auto *BV0123 =
cast<BuildVectorSDNode>(DAG->getBuildVector(VecVT, Loc, Pattern0123));
auto *BV022 =
cast<BuildVectorSDNode>(DAG->getBuildVector(VecVT, Loc, Pattern022));
diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index 449888838acdc..67eb508e9bab8 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
AArch64Desc
AArch64Info
AArch64Utils
+ Analysis
AsmParser
CodeGen
CodeGenTypes
@@ -30,5 +31,6 @@ add_llvm_target_unittest(AArch64Tests
SMEAttributesTest.cpp
AArch64RegisterInfoTest.cpp
AArch64SVESchedPseudoTest.cpp
+ AArch64SelectionDAGTest.cpp
Immediates.cpp
)
>From 757923bb069bef02afbe8ca4eaa3f43baab7c7cd Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Sat, 17 May 2025 19:37:32 +0000
Subject: [PATCH 2/4] Register missing SDNodes in TableGen
This also removes some unused SDNodes (rather than add the definitions)
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 60 -------------
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 85 +++++++++++++++++--
.../AArch64/AArch64SelectionDAGInfo.cpp | 55 ------------
.../Target/AArch64/AArch64SelectionDAGInfo.h | 83 +-----------------
4 files changed, 80 insertions(+), 203 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 96fa85179d023..2eb8c6008db0f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7216,57 +7216,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
- case AArch64ISD::SVE_LD2_MERGE_ZERO: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
- return;
- }
- break;
- }
- case AArch64ISD::SVE_LD3_MERGE_ZERO: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
- return;
- }
- break;
- }
- case AArch64ISD::SVE_LD4_MERGE_ZERO: {
- if (VT == MVT::nxv16i8) {
- SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
- return;
- } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- VT == MVT::nxv8bf16) {
- SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
- return;
- } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
- return;
- } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
- return;
- }
- break;
- }
}
// Select the default instruction
@@ -7340,15 +7289,6 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
return cast<VTSDNode>(Root->getOperand(3))->getVT();
case AArch64ISD::ST1_PRED:
return cast<VTSDNode>(Root->getOperand(4))->getVT();
- case AArch64ISD::SVE_LD2_MERGE_ZERO:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
- case AArch64ISD::SVE_LD3_MERGE_ZERO:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
- case AArch64ISD::SVE_LD4_MERGE_ZERO:
- return getPackedVectorTypeFromPredicateType(
- Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
default:
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index cba2559826d97..97fc0c4ae9615 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1044,6 +1044,19 @@ def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTCisVT<2, i32>]>,
[SDNPHasChain]>;
+// 128-bit system register accesses
+// lo64, hi64, chain = MRRS(chain, sysregname)
+def AArch64mrrs : SDNode<"AArch64ISD::MRRS",
+ SDTypeProfile<2, 1, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>]>,
+ [SDNPHasChain]>;
+
+// chain = MSRR(chain, sysregname, lo64, hi64)
+def AArch64msrr : SDNode<"AArch64ISD::MSRR",
+ SDTypeProfile<0, 3, [SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>]>,
+ [SDNPHasChain]>;
+
def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
@@ -1053,6 +1066,66 @@ def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
[SDTCisInt<0>, SDTCisVec<1>]>, []>;
+def AArch64ld2post : SDNode<"AArch64ISD::LD2post", SDTypeProfile<3, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld3post : SDNode<"AArch64ISD::LD3post", SDTypeProfile<4, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld4post : SDNode<"AArch64ISD::LD4post", SDTypeProfile<5, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64st2post : SDNode<"AArch64ISD::ST2post", SDTypeProfile<1, 4, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st3post : SDNode<"AArch64ISD::ST3post", SDTypeProfile<1, 5, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st4post : SDNode<"AArch64ISD::ST4post", SDTypeProfile<1, 6, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64ld1x2post : SDNode<"AArch64ISD::LD1x2post", SDTypeProfile<3, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld1x3post : SDNode<"AArch64ISD::LD1x3post", SDTypeProfile<4, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld1x4post : SDNode<"AArch64ISD::LD1x4post", SDTypeProfile<5, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64st1x2post : SDNode<"AArch64ISD::ST1x2post", SDTypeProfile<1, 4, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st1x3post : SDNode<"AArch64ISD::ST1x3post", SDTypeProfile<1, 5, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st1x4post : SDNode<"AArch64ISD::ST1x4post", SDTypeProfile<1, 6, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64ld1duppost : SDNode<"AArch64ISD::LD1DUPpost", SDTypeProfile<2, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld2duppost : SDNode<"AArch64ISD::LD2DUPpost", SDTypeProfile<3, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld3duppost: SDNode<"AArch64ISD::LD3DUPpost", SDTypeProfile<4, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld4duppost: SDNode<"AArch64ISD::LD4DUPpost", SDTypeProfile<5, 2, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld1lanepost: SDNode<"AArch64ISD::LD1LANEpost", SDTypeProfile<2, 4, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld2lanepost : SDNode<"AArch64ISD::LD2LANEpost", SDTypeProfile<3, 5, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld3lanepost: SDNode<"AArch64ISD::LD3LANEpost", SDTypeProfile<4, 6, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ld4lanepost: SDNode<"AArch64ISD::LD4LANEpost", SDTypeProfile<5, 7, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64st2lanepost : SDNode<"AArch64ISD::ST2LANEpost", SDTypeProfile<1, 5, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st3lanepost : SDNode<"AArch64ISD::ST3LANEpost", SDTypeProfile<1, 6, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st4lanepost : SDNode<"AArch64ISD::ST4LANEpost", SDTypeProfile<1, 7, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+// Scatter store
+def AArch64sstnt1_index_pred: SDNode<"AArch64ISD::SSTNT1_INDEX_PRED", SDTypeProfile<0, 5, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+// Non-temporal scatter store
+def AArch64sst1q_index_pred: SDNode<"AArch64ISD::SST1Q_INDEX_PRED", SDTypeProfile<0, 5, []>, [
+ SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+// Non-temporal gather loads
+def AArch64gldnt1_index_merge_zero: SDNode<"AArch64ISD::GLDNT1_INDEX_MERGE_ZERO", SDTypeProfile<1, 4, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+// Unsigned gather loads.
+def AArch64gld1q_index_merge_zero: SDNode<"AArch64ISD::GLD1Q_INDEX_MERGE_ZERO", SDTypeProfile<1, 4, []>, [
+ SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
// have no common bits.
def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
@@ -5700,14 +5773,14 @@ def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))),
(v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
// Patterns for funnel shifts to be matched to equivalent REV instructions
-def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))),
- (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))),
+def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))),
+ (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))),
(v2i64 (REV64v4i32 (v2i64 V128:$Rn)))>;
-def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))),
- (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))),
+def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))),
+ (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))),
(v4i32 (REV32v8i16 (v4i32 V128:$Rn)))>;
-def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))),
- (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))),
+def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))),
+ (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))),
(v2i32 (REV32v4i16 (v2i32 V64:$Rn)))>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 1c903765efc5a..1897545e53b1f 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -30,61 +30,6 @@ static cl::opt<bool>
AArch64SelectionDAGInfo::AArch64SelectionDAGInfo()
: SelectionDAGGenTargetInfo(AArch64GenSDNodeInfo) {}
-const char *AArch64SelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
-#define MAKE_CASE(V) \
- case V: \
- return #V;
-
- // These nodes don't have corresponding entries in *.td files yet.
- switch (static_cast<AArch64ISD::NodeType>(Opcode)) {
- MAKE_CASE(AArch64ISD::LD2post)
- MAKE_CASE(AArch64ISD::LD3post)
- MAKE_CASE(AArch64ISD::LD4post)
- MAKE_CASE(AArch64ISD::ST2post)
- MAKE_CASE(AArch64ISD::ST3post)
- MAKE_CASE(AArch64ISD::ST4post)
- MAKE_CASE(AArch64ISD::LD1x2post)
- MAKE_CASE(AArch64ISD::LD1x3post)
- MAKE_CASE(AArch64ISD::LD1x4post)
- MAKE_CASE(AArch64ISD::ST1x2post)
- MAKE_CASE(AArch64ISD::ST1x3post)
- MAKE_CASE(AArch64ISD::ST1x4post)
- MAKE_CASE(AArch64ISD::LD1DUPpost)
- MAKE_CASE(AArch64ISD::LD2DUPpost)
- MAKE_CASE(AArch64ISD::LD3DUPpost)
- MAKE_CASE(AArch64ISD::LD4DUPpost)
- MAKE_CASE(AArch64ISD::LD1LANEpost)
- MAKE_CASE(AArch64ISD::LD2LANEpost)
- MAKE_CASE(AArch64ISD::LD3LANEpost)
- MAKE_CASE(AArch64ISD::LD4LANEpost)
- MAKE_CASE(AArch64ISD::ST2LANEpost)
- MAKE_CASE(AArch64ISD::ST3LANEpost)
- MAKE_CASE(AArch64ISD::ST4LANEpost)
- MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLD1Q_INDEX_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
- MAKE_CASE(AArch64ISD::SST1Q_INDEX_PRED)
- MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
- MAKE_CASE(AArch64ISD::INDEX_VECTOR)
- MAKE_CASE(AArch64ISD::MRRS)
- MAKE_CASE(AArch64ISD::MSRR)
- }
-#undef MAKE_CASE
-
- return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
-}
-
-bool AArch64SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- // These nodes don't have corresponding entries in *.td files yet.
- if (Opcode >= AArch64ISD::FIRST_MEMORY_OPCODE &&
- Opcode <= AArch64ISD::LAST_MEMORY_OPCODE)
- return true;
-
- return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
-}
-
void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const {
#ifndef NDEBUG
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 4f84672053a3c..e11bf8183a35c 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -20,92 +20,11 @@
#include "AArch64GenSDNodeInfo.inc"
namespace llvm {
-namespace AArch64ISD {
-
-// For predicated nodes where the result is a vector, the operation is
-// controlled by a governing predicate and the inactive lanes are explicitly
-// defined with a value, please stick the following naming convention:
-//
-// _MERGE_OP<n> The result value is a vector with inactive lanes equal
-// to source operand OP<n>.
-//
-// _MERGE_ZERO The result value is a vector with inactive lanes
-// actively zeroed.
-//
-// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
-// to the last source operand which only purpose is being
-// a passthru value.
-//
-// For other cases where no explicit action is needed to set the inactive lanes,
-// or when the result is not a vector and it is needed or helpful to
-// distinguish a node from similar unpredicated nodes, use:
-//
-// _PRED
-//
-enum NodeType : unsigned {
- INDEX_VECTOR = GENERATED_OPCODE_END,
-
- // Structured loads.
- SVE_LD2_MERGE_ZERO,
- SVE_LD3_MERGE_ZERO,
- SVE_LD4_MERGE_ZERO,
-
- // Unsigned gather loads.
- GLD1Q_INDEX_MERGE_ZERO,
-
- // Non-temporal gather loads
- GLDNT1_INDEX_MERGE_ZERO,
-
- // Scatter store
- SST1Q_INDEX_PRED,
-
- // Non-temporal scatter store
- SSTNT1_INDEX_PRED,
-
- // 128-bit system register accesses
- // lo64, hi64, chain = MRRS(chain, sysregname)
- MRRS,
- // chain = MSRR(chain, sysregname, lo64, hi64)
- MSRR,
-
- // NEON Load/Store with post-increment base updates
- FIRST_MEMORY_OPCODE,
- LD2post = FIRST_MEMORY_OPCODE,
- LD3post,
- LD4post,
- ST2post,
- ST3post,
- ST4post,
- LD1x2post,
- LD1x3post,
- LD1x4post,
- ST1x2post,
- ST1x3post,
- ST1x4post,
- LD1DUPpost,
- LD2DUPpost,
- LD3DUPpost,
- LD4DUPpost,
- LD1LANEpost,
- LD2LANEpost,
- LD3LANEpost,
- LD4LANEpost,
- ST2LANEpost,
- ST3LANEpost,
- ST4LANEpost,
- LAST_MEMORY_OPCODE = ST4LANEpost,
-};
-
-} // namespace AArch64ISD
class AArch64SelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
AArch64SelectionDAGInfo();
- const char *getTargetNodeName(unsigned Opcode) const override;
-
- bool isTargetMemoryOpcode(unsigned Opcode) const override;
-
void verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const override;
@@ -143,6 +62,6 @@ class AArch64SelectionDAGInfo : public SelectionDAGGenTargetInfo {
SDValue Src, SDValue Size,
RTLIB::Libcall LC) const;
};
-}
+} // namespace llvm
#endif
>From 1fffd730ebf1ea331da0b84ec2e927fc08c54617 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Sun, 18 May 2025 18:14:05 +0000
Subject: [PATCH 3/4] Fix some SDNode issues
- Avoid extra undef parameters in rev lowerings
- Use consistent types for nodes
- Remove glue from descriptions of nodes that never use glue
---
.../Target/AArch64/AArch64ISelLowering.cpp | 49 ++++++++++---------
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 42 ++++++++--------
.../AArch64/AArch64SelectionDAGInfo.cpp | 28 ++---------
.../AArch64/sve-fixed-length-permute-rev.ll | 3 +-
...streaming-mode-fixed-length-permute-rev.ll | 4 +-
5 files changed, 53 insertions(+), 73 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index be39e7a849b82..258c8b0335759 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5705,13 +5705,13 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
case Intrinsic::aarch64_sme_za_enable:
return DAG.getNode(
- AArch64ISD::SMSTART, DL, MVT::Other,
+ AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue),
Op->getOperand(0), // Chain
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
case Intrinsic::aarch64_sme_za_disable:
return DAG.getNode(
- AArch64ISD::SMSTOP, DL, MVT::Other,
+ AArch64ISD::SMSTOP, DL, DAG.getVTList(MVT::Other, MVT::Glue),
Op->getOperand(0), // Chain
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
@@ -8203,8 +8203,8 @@ SDValue AArch64TargetLowering::LowerCallResult(
}
if (RequiresSMChange && isPassedInFPR(VA.getValVT()))
- Val = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, Val.getValueType(),
- Val);
+ Val = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL,
+ DAG.getVTList(Val.getValueType(), MVT::Glue), Val);
InVals.push_back(Val);
}
@@ -8863,7 +8863,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (DisableZA)
Chain = DAG.getNode(
- AArch64ISD::SMSTOP, DL, MVT::Other, Chain,
+ AArch64ISD::SMSTOP, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain,
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
@@ -9034,7 +9034,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// smstart/smstop and the call by the simple register coalescer.
if (RequiresSMChange && isPassedInFPR(Arg.getValueType()))
Arg = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL,
- Arg.getValueType(), Arg);
+ DAG.getVTList(Arg.getValueType(), MVT::Glue), Arg);
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
@@ -9341,7 +9341,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (CallAttrs.requiresEnablingZAAfterCall())
// Unconditionally resume ZA.
Result = DAG.getNode(
- AArch64ISD::SMSTART, DL, MVT::Other, Result,
+ AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue), Result,
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
@@ -9512,8 +9512,10 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
for (auto &RetVal : RetVals) {
if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface() &&
isPassedInFPR(RetVal.second.getValueType()))
- RetVal.second = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL,
- RetVal.second.getValueType(), RetVal.second);
+ RetVal.second =
+ DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL,
+ DAG.getVTList(RetVal.second.getValueType(), MVT::Glue),
+ RetVal.second);
Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(
@@ -29440,11 +29442,18 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
return convertFromScalableVector(DAG, VT, Op);
}
+ auto lowerToRevMergePassthru = [&](unsigned Opcode, SDValue Vec, EVT NewVT) {
+ auto Pg = getPredicateForVector(DAG, DL, NewVT);
+ SDValue RevOp = DAG.getNode(ISD::BITCAST, DL, NewVT, Vec);
+ auto Rev =
+ DAG.getNode(Opcode, DL, NewVT, Pg, RevOp, DAG.getUNDEF(ContainerVT));
+ auto Cast = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Rev);
+ return convertFromScalableVector(DAG, VT, Cast);
+ };
+
unsigned EltSize = VT.getScalarSizeInBits();
for (unsigned LaneSize : {64U, 32U, 16U}) {
if (isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), LaneSize)) {
- EVT NewVT =
- getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
unsigned RevOp;
if (EltSize == 8)
RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
@@ -29452,24 +29461,16 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
else
RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
-
- Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
- Op = LowerToPredicatedOp(Op, DAG, RevOp);
- Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
- return convertFromScalableVector(DAG, VT, Op);
+ EVT NewVT =
+ getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
+ return lowerToRevMergePassthru(RevOp, Op1, NewVT);
}
}
if (Subtarget->hasSVE2p1() && EltSize == 64 &&
isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), 128)) {
- if (!VT.isFloatingPoint())
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
-
- EVT NewVT = getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), 64));
- Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
- Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
- Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
- return convertFromScalableVector(DAG, VT, Op);
+ return lowerToRevMergePassthru(AArch64ISD::REVD_MERGE_PASSTHRU, Op1,
+ ContainerVT);
}
unsigned WhichResult;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a4bcd6847c4f0..020051bbadea5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -32,16 +32,16 @@ def SDT_AArch64_LD1 : SDTypeProfile<1, 3, [
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def AArch64ld1_z : SDNode<"AArch64ISD::LD1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s_z : SDNode<"AArch64ISD::LD1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_z : SDNode<"AArch64ISD::LD1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_z : SDNode<"AArch64ISD::LD1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad]>;
// Non-faulting & first-faulting loads - node definitions
//
-def AArch64ldnf1_z : SDNode<"AArch64ISD::LDNF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_z : SDNode<"AArch64ISD::LDFF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldnf1_z : SDNode<"AArch64ISD::LDNF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1_z : SDNode<"AArch64ISD::LDFF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ldnf1s_z : SDNode<"AArch64ISD::LDNF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_z : SDNode<"AArch64ISD::LDFF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldnf1s_z : SDNode<"AArch64ISD::LDNF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1s_z : SDNode<"AArch64ISD::LDFF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad]>;
// Contiguous load and replicate - node definitions
//
@@ -82,21 +82,21 @@ def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MER
def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
-def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-
-def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+
+def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1s_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 1897545e53b1f..adf61e667c591 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -36,32 +36,10 @@ void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
switch (N->getOpcode()) {
default:
return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
- case AArch64ISD::GLDFF1S_IMM_MERGE_ZERO:
- case AArch64ISD::GLDFF1S_MERGE_ZERO:
- case AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO:
- case AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO:
- case AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO:
- case AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO:
- case AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO:
- case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
- case AArch64ISD::GLDFF1_MERGE_ZERO:
- case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
- case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
- case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
- case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
- case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
- case AArch64ISD::LDFF1S_MERGE_ZERO:
- case AArch64ISD::LDFF1_MERGE_ZERO:
- case AArch64ISD::LDNF1S_MERGE_ZERO:
- case AArch64ISD::LDNF1_MERGE_ZERO:
- // invalid number of results; expected 3, got 2
- case AArch64ISD::SMSTOP:
- case AArch64ISD::COALESCER_BARRIER:
- // invalid number of results; expected 2, got 1
case AArch64ISD::SMSTART:
- // variadic operand #3 must be Register or RegisterMask
- case AArch64ISD::REVD_MERGE_PASSTHRU:
- // invalid number of operands; expected 3, got 4
+ case AArch64ISD::SMSTOP:
+ // FIXME: These can't be verified by SelectionDAGGenTargetInfo as the
+ // variadic "PStateSM" operand is not a Register or RegisterMask.
return;
case AArch64ISD::SADDWT:
case AArch64ISD::SADDWB:
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
index 0cda4d94444e9..faf82d4945b3d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
@@ -213,8 +213,9 @@ define void @test_revdv4i64_sve2p1(ptr %a) #2 {
; CHECK-LABEL: test_revdv4i64_sve2p1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
+; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: revd z0.q, p0/m, z0.q
+; CHECK-NEXT: revd z0.q, p1/m, z0.q
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%tmp1 = load <4 x i64>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index c364abf2916e8..d8f83834a1bca 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -677,7 +677,7 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 {
; CHECK-LABEL: test_revdv4i64_sve2p1:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: revd z0.q, p0/m, z0.q
; CHECK-NEXT: revd z1.q, p0/m, z1.q
; CHECK-NEXT: stp q0, q1, [x0]
@@ -686,7 +686,7 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 {
; NONEON-NOSVE-LABEL: test_revdv4i64_sve2p1:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT: ptrue p0.d
; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q
; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
>From 81dc17d458e8d327b956b8b6d779e053f12bb1c6 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Sun, 18 May 2025 18:49:33 +0000
Subject: [PATCH 4/4] Import doc comments from C++
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 70 ++++++++++++++++++-
.../lib/Target/AArch64/AArch64SMEInstrInfo.td | 1 +
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 56 +++++++++++----
llvm/lib/Target/AArch64/SMEInstrFormats.td | 7 +-
4 files changed, 117 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 97fc0c4ae9615..ccf599b7d81ed 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -704,10 +704,15 @@ def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
}]>;
// Node definitions.
+// Compare-and-branch
def AArch64CB : SDNode<"AArch64ISD::CB", SDT_AArch64cb, [SDNPHasChain]>;
+// Page address of a TargetGlobalAddress operand.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
+// Add the low 12 bits of a TargetGlobalAddress operand.
def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
+// Load from automatically generated descriptor (e.g. Global Offset Table, TLS
+// record).
def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
SDCallSeqStart<[ SDTCisVT<0, i32>,
@@ -722,21 +727,27 @@ def AArch64call : SDNode<"AArch64ISD::CALL",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// Function call followed by a BTI instruction.
def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// Pseudo for a OBJC call that gets emitted together with a special `mov
+// x29, x29` marker instruction.
def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// A call with the callee in x16, i.e. "blr x16".
def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// Function call, authenticating the callee value first:
+// AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
SDTCisVT<1, i32>,
@@ -745,6 +756,8 @@ def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
+// operands.
def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN",
SDTypeProfile<0, 5, [SDTCisPtrTy<0>,
SDTCisVT<2, i32>,
@@ -752,6 +765,7 @@ def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN",
SDTCisVT<4, i64>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+// Authenticated variant of CALL_RVMARKER.
def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
SDTCisVT<1, i32>,
@@ -762,6 +776,7 @@ def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// Conditional branch instruction; "b.cond".
def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
[SDNPHasChain]>;
def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
@@ -775,13 +790,19 @@ def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
+// Conditional select invert.
def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
+// Conditional select negate.
def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
+// Conditional select increment.
def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
+// Return with a glue operand. Operand 0 is the chain operand.
def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
+
+// Arithmetic instructions which write flags.
def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut,
[SDNPCommutative]>;
def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
@@ -790,15 +811,20 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
+// Conditional compares. Operands: left,right,falsecc,cc,flags
def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
+// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
+// ELF.
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+// Floating point comparison
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
let IsStrictFP = true in {
+ // Strict (exception-raising) floating point comparison
def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
[SDNPHasChain]>;
def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
@@ -809,6 +835,7 @@ def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(AArch64strict_fcmp node:$lhs, node:$rhs),
(AArch64fcmp node:$lhs, node:$rhs)]>;
+// Scalar-to-vector duplication
def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
@@ -818,6 +845,7 @@ def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
+// Vector shuffles
def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
@@ -825,6 +853,7 @@ def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
+// Vector immedate moves
def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
@@ -838,6 +867,9 @@ def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64Rev>;
def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64Rev>;
def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
+// Vector shift by scalar
+def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
+def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs),
@@ -845,16 +877,18 @@ def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs),
return N->getFlags().hasExact();
}]>;
-def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
-def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
+// Vector shift by scalar (again)
def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
+
def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
+// Vector bitwise select: similar to ISD::VSELECT but not all bits within an
+// element must be identical.
def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
def AArch64cmeq : PatFrag<(ops node:$lhs, node:$rhs),
@@ -868,6 +902,7 @@ def AArch64cmhi : PatFrag<(ops node:$lhs, node:$rhs),
def AArch64cmhs : PatFrag<(ops node:$lhs, node:$rhs),
(setcc node:$lhs, node:$rhs, SETUGE)>;
+// Vector comparisons
def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
@@ -902,6 +937,7 @@ def AArch64fcmlez : PatFrag<(ops node:$lhs),
def AArch64fcmltz : PatFrag<(ops node:$lhs),
(AArch64fcmgt immAllZerosV, node:$lhs)>;
+// Round wide FP to narrow FP with inexact results to odd.
def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
[(f32 (int_aarch64_sisd_fcvtxn (f64 node:$Rn))),
@@ -912,18 +948,24 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
+// Vector immediate ops
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
+// Tail calls
def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+// Custom prefetch handling
def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
[SDNPHasChain, SDNPSideEffect]>;
+// {s|u}int to FP within a FP register.
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
+// Produces the full sequence of instructions for getting the thread pointer
+// offset of a variable into X0, using the TLSDesc model.
def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
SDT_AArch64TLSDescCallSeq,
[SDNPOutGlue, SDNPHasChain, SDNPVariadic]>;
@@ -935,6 +977,11 @@ def AArch64tlsdesc_auth_callseq : SDNode<"AArch64ISD::TLSDESC_AUTH_CALLSEQ",
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
+/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
+/// world w.r.t vectors; which causes additional REV instructions to be
+/// generated to compensate for the byte-swapping. But sometimes we do
+/// need to re-interpret the data in SIMD vector registers in big-endian
+/// mode without emitting such REV instructions.
def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
@@ -946,21 +993,30 @@ def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
[SDNPCommutative]>;
+// Reciprocal estimates and steps.
def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
+// udot/sdot/usdot instructions
def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
def AArch64usdot : SDNode<"AArch64ISD::USDOT", SDT_AArch64Dot>;
+// Vector across-lanes addition
+// Only the lower result lane is defined.
def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+
+// Vector across-lanes min/max
+// Only the lower result lane is defined.
def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+
+// Unsigned sum Long across Vector
def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>;
@@ -971,7 +1027,9 @@ def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abds node:$lhs, node:$rhs),
(int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
+// Add Pairwise of two vectors
def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
+// Add Long Pairwise
def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm),
@@ -1033,11 +1091,14 @@ def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDN
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
+// To avoid stack clash, allocation is performed by block and each block is
+// probed.
def AArch64probedalloca
: SDNode<"AArch64ISD::PROBED_ALLOCA",
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPMayStore]>;
+// MRS, also sets the flags via a glue.
def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTypeProfile<2, 1, [SDTCisVT<0, i64>,
SDTCisVT<1, i32>,
@@ -1058,6 +1119,7 @@ def AArch64msrr : SDNode<"AArch64ISD::MSRR",
[SDNPHasChain]>;
def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
+// Vector narrowing shift by immediate (bottom)
def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
[(AArch64rshrnb node:$rs, node:$i),
@@ -1066,6 +1128,8 @@ def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
[SDTCisInt<0>, SDTCisVec<1>]>, []>;
+// NEON Load/Store with post-increment base updates.
+// TODO: Complete SDTypeProfile constraints.
def AArch64ld2post : SDNode<"AArch64ISD::LD2post", SDTypeProfile<3, 2, []>, [
SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64ld3post : SDNode<"AArch64ISD::LD3post", SDTypeProfile<4, 2, []>, [
@@ -10344,6 +10408,8 @@ def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
[]>, Sched<[]>;
+// Asserts that a function argument (i32) is zero-extended to i8 by
+// the caller
def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
def : Pat<(AArch64AssertZExtBool GPR32:$op),
(i32 GPR32:$op)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 363ecee49c0f2..f34103af438e1 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -53,6 +53,7 @@ let usesCustomInserter = 1 in {
}
// Nodes to allocate a save buffer for SME.
+// Needed for __arm_agnostic("sme_za_state").
def AArch64SMESaveSize : SDNode<"AArch64ISD::GET_SME_SAVE_SIZE", SDTypeProfile<1, 0,
[SDTCisInt<0>]>, [SDNPHasChain]>;
let usesCustomInserter = 1, Defs = [X0] in {
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 020051bbadea5..a40ef56f30486 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -10,16 +10,35 @@
//
//===----------------------------------------------------------------------===//
-// For predicated nodes where the entire operation is controlled by a governing
-// predicate, please stick to a similar naming convention as used for the
-// ISD nodes:
+// For predicated nodes where the result is a vector, the operation is
+// controlled by a governing predicate and the inactive lanes are explicitly
+// defined with a value, please stick the following naming convention for ISD
+// nodes:
//
-// SDNode <=> AArch64ISD
-// -------------------------------
-// _m<n> <=> _MERGE_OP<n>
-// _mt <=> _MERGE_PASSTHRU
-// _z <=> _MERGE_ZERO
-// _p <=> _PRED
+// _MERGE_OP<n> The result value is a vector with inactive lanes equal
+// to source operand OP<n>.
+//
+// _MERGE_ZERO The result value is a vector with inactive lanes
+// actively zeroed.
+//
+// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
+// to the last source operand which only purpose is being
+// a passthru value.
+//
+// For other cases where no explicit action is needed to set the inactive lanes,
+// or when the result is not a vector and it is needed or helpful to
+// distinguish a node from similar unpredicated nodes, use:
+//
+// _PRED
+//
+// The TableGen definition names should be based on the ISD node's name:
+//
+// TableGen SDNode <=> AArch64ISD
+// --------------------------------------------
+// _m<n> <=> _MERGE_OP<n>
+// _mt <=> _MERGE_PASSTHRU
+// _z <=> _MERGE_ZERO
+// _p <=> _PRED
//
// Given the context of this file, it is not strictly necessary to use _p to
// distinguish predicated from unpredicated nodes given that most SVE
@@ -66,6 +85,7 @@ def SDT_AArch64_GATHER_VS : SDTypeProfile<1, 4, [
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
+// Unsigned gather loads.
def AArch64ld1_gather_z : SDNode<"AArch64ISD::GLD1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ld1_gather_scaled_z : SDNode<"AArch64ISD::GLD1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ld1_gather_uxtw_z : SDNode<"AArch64ISD::GLD1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
@@ -74,6 +94,7 @@ def AArch64ld1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1_UXTW_SCALED_MERGE
def AArch64ld1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ld1_gather_imm_z : SDNode<"AArch64ISD::GLD1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+// Signed gather loads
def AArch64ld1s_gather_z : SDNode<"AArch64ISD::GLD1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ld1s_gather_scaled_z : SDNode<"AArch64ISD::GLD1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ld1s_gather_uxtw_z : SDNode<"AArch64ISD::GLD1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
@@ -82,6 +103,7 @@ def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MER
def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+// Unsigned gather loads.
def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
@@ -90,6 +112,7 @@ def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_M
def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+// Signed gather loads.
def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
@@ -98,6 +121,7 @@ def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED
def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+// Non-temporal gather loads
def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
// Gather vector base + scalar offset
@@ -110,6 +134,7 @@ def SDT_AArch64_ST1 : SDTypeProfile<0, 4, [
SDTCVecEltisVT<2,i1>, SDTCisSameNumEltsAs<0,2>
]>;
+// Contiguous masked store.
def AArch64st1 : SDNode<"AArch64ISD::ST1_PRED", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>;
// Scatter stores - node definitions
@@ -124,6 +149,7 @@ def SDT_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
+// Scatter store
def AArch64st1_scatter : SDNode<"AArch64ISD::SST1_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
@@ -132,6 +158,7 @@ def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED_PRED",
def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
+// Non-temporal scatter store
def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
// Scatter vector base + scalar offset
@@ -152,6 +179,11 @@ def sve_cntw_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -4>">;
def sve_cntd_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -2>">;
def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;
+def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3,
+ [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>;
+
+// Floating-point reductions.
+def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>;
def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
@@ -361,11 +393,8 @@ def AArch64fcvtx_mt : SDNode<"AArch64ISD::FCVTX_MERGE_PASSTHRU", SDT_AArch64FCV
def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3,
- [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
-def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(AArch64fadda_p_node node:$op1, node:$op2, node:$op3),
@@ -390,6 +419,7 @@ def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED
def AArch64splice : SDNode<"AArch64ISD::SPLICE", SDT_AArch64Arith>;
+// Cast between vectors of the same element type but differ in length.
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
let HasOneUse = 1 in
@@ -430,10 +460,12 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
]>;
+// Unpredicated vector instructions
def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>;
def SDT_AArch64addw : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
+// Wide adds
def AArch64saddwt : SDNode<"AArch64ISD::SADDWT", SDT_AArch64addw>;
def AArch64saddwb : SDNode<"AArch64ISD::SADDWB", SDT_AArch64addw>;
def AArch64uaddwt : SDNode<"AArch64ISD::UADDWT", SDT_AArch64addw>;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 97e1da7df56b7..b3005d5120229 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -73,6 +73,7 @@ def FILL_PPR_FROM_ZPR_SLOT_PSEUDO :
}
def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
+// SME ZA loads and stores
def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
[SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore,
@@ -285,7 +286,7 @@ class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand
class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
- (!cast<Instruction>(name) $base, $offset)>;
+ (!cast<Instruction>(name) $base, $offset)>;
class SME2_Tile_Movaz_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, Operand tile_imm, Operand index_ty, ComplexPattern tileslice>
: Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))),
@@ -2337,7 +2338,7 @@ multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPat
multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
ValueType zpr_ty, SDPatternOperator intrinsic, list<Register> uses=[]> {
- def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
+ def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
let Uses = uses;
}
@@ -5437,7 +5438,7 @@ multiclass sme2p1_zero_matrix<string mnemonic> {
def : SME2_Zero_Matrix_Pat<NAME # _4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x1, uimm1s4range, tileslicerange1s4>;
def : SME2_Zero_Matrix_Pat<NAME # _VG2_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x2, uimm0s4range, tileslicerange0s4>;
def : SME2_Zero_Matrix_Pat<NAME # _VG4_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x4, uimm0s4range, tileslicerange0s4>;
-}
+}
//===----------------------------------------------------------------------===//
// SME2.1 lookup table expand two non-contiguous registers
More information about the llvm-commits
mailing list