[llvm] a5b22b7 - [AArch64][SVE] Add support for DestructiveBinary and DestructiveBinaryComm DestructiveInstTypes
Cameron McInally via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 13:20:08 PST 2020
Author: Cameron McInally
Date: 2020-02-21T15:19:54-06:00
New Revision: a5b22b768f5a6f34c8f41eea6a32880794c1690b
URL: https://github.com/llvm/llvm-project/commit/a5b22b768f5a6f34c8f41eea6a32880794c1690b
DIFF: https://github.com/llvm/llvm-project/commit/a5b22b768f5a6f34c8f41eea6a32880794c1690b.diff
LOG: [AArch64][SVE] Add support for DestructiveBinary and DestructiveBinaryComm DestructiveInstTypes
Add support for DestructiveBinaryComm DestructiveInstType, as well as the lowering code to expand the new Pseudos into the final movprfx+instruction pairs.
Differential Revision: https://reviews.llvm.org/D73711
Added:
llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
Modified:
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/O0-pipeline.ll
llvm/test/CodeGen/AArch64/O3-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 496056ff24cd..050d9830e402 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -68,6 +68,8 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
+ bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
unsigned ExtendImm, unsigned ZeroReg,
@@ -344,6 +346,176 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
return true;
}
+/// \brief Expand Pseudos to Instructions with destructive operands.
+///
+/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
+/// or for fixing relaxed register allocation conditions to comply with
+/// the instructions register constraints. The latter case may be cheaper
+/// than setting the register constraints in the register allocator,
+/// since that will insert regular MOV instructions rather than MOVPRFX.
+///
+/// Example (after register allocation):
+///
+/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
+///
+/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
+/// * We cannot map directly to FSUB_ZPmZ_B because the register
+/// constraints of the instruction are not met.
+/// * Also the _ZERO specifies the false lanes need to be zeroed.
+///
+/// We first try to see if the destructive operand == result operand,
+/// if not, we try to swap the operands, e.g.
+///
+/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// But because FSUB_ZPmZ is not commutative, this is semantically
+///
diff erent, so we need a reverse instruction:
+///
+/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// Then we implement the zeroing of the false lanes of Z0 by adding
+/// a zeroing MOVPRFX instruction:
+///
+/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
+/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// Note that this can only be done for _ZERO or _UNDEF variants where
+/// we can guarantee the false lanes to be zeroed (by implementing this)
+/// or that they are undef (don't care / not used), otherwise the
+/// swapping of operands is illegal because the operation is not
+/// (or cannot be emulated to be) fully commutative.
+bool AArch64ExpandPseudo::expand_DestructiveOp(
+ MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
+ uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
+ uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
+ bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+
+ if (DType == AArch64::DestructiveBinary)
+ assert(DstReg != MI.getOperand(3).getReg());
+
+ bool UseRev = false;
+ unsigned PredIdx, DOPIdx, SrcIdx;
+ switch (DType) {
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ if (DstReg == MI.getOperand(3).getReg()) {
+ // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
+ std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
+ UseRev = true;
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case AArch64::DestructiveBinary:
+ std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
+ break;
+ default:
+ llvm_unreachable("Unsupported Destructive Operand type");
+ }
+
+#ifndef NDEBUG
+ // MOVPRFX can only be used if the destination operand
+ // is the destructive operand, not as any other operand,
+ // so the Destructive Operand must be unique.
+ bool DOPRegIsUnique = false;
+ switch (DType) {
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ DOPRegIsUnique =
+ DstReg != MI.getOperand(DOPIdx).getReg() ||
+ MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
+ break;
+ }
+
+ assert (DOPRegIsUnique && "The destructive operand should be unique");
+#endif
+
+ // Resolve the reverse opcode
+ if (UseRev) {
+ if (AArch64::getSVERevInstr(Opcode) != -1)
+ Opcode = AArch64::getSVERevInstr(Opcode);
+ else if (AArch64::getSVEOrigInstr(Opcode) != -1)
+ Opcode = AArch64::getSVEOrigInstr(Opcode);
+ }
+
+ // Get the right MOVPRFX
+ uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
+ unsigned MovPrfx, MovPrfxZero;
+ switch (ElementSize) {
+ case AArch64::ElementSizeNone:
+ case AArch64::ElementSizeB:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
+ break;
+ case AArch64::ElementSizeH:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
+ break;
+ case AArch64::ElementSizeS:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
+ break;
+ case AArch64::ElementSizeD:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
+ break;
+ default:
+ llvm_unreachable("Unsupported ElementSize");
+ }
+
+ //
+ // Create the destructive operation (if required)
+ //
+ MachineInstrBuilder PRFX, DOP;
+ if (FalseZero) {
+ assert(ElementSize != AArch64::ElementSizeNone &&
+ "This instruction is unpredicated");
+
+ // Merge source operand into destination register
+ PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
+ .addReg(DstReg, RegState::Define)
+ .addReg(MI.getOperand(PredIdx).getReg())
+ .addReg(MI.getOperand(DOPIdx).getReg());
+
+ // After the movprfx, the destructive operand is same as Dst
+ DOPIdx = 0;
+ } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
+ PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
+ .addReg(DstReg, RegState::Define)
+ .addReg(MI.getOperand(DOPIdx).getReg());
+ DOPIdx = 0;
+ }
+
+ //
+ // Create the destructive operation
+ //
+ DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
+
+ switch (DType) {
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ DOP.add(MI.getOperand(PredIdx))
+ .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
+ .add(MI.getOperand(SrcIdx));
+ break;
+ }
+
+ if (PRFX) {
+ finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
+ transferImpOps(MI, PRFX, DOP);
+ } else
+ transferImpOps(MI, DOP, DOP);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64ExpandPseudo::expandSetTagLoop(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
@@ -425,6 +597,17 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
+
+ // Check if we can expand the destructive op
+ int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
+ if (OrigInstr != -1) {
+ auto &Orig = TII->get(OrigInstr);
+ if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
+ != AArch64::NotDestructive) {
+ return expand_DestructiveOp(MI, MBB, MBBI);
+ }
+ }
+
switch (Opcode) {
default:
break;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 9c6703129eb0..063a654ef869 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -162,6 +162,25 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
+ bool SelectDupZero(SDValue N) {
+ switch(N->getOpcode()) {
+ case AArch64ISD::DUP:
+ case ISD::SPLAT_VECTOR: {
+ auto Opnd0 = N->getOperand(0);
+ if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
+ if (CN->isNullValue())
+ return true;
+ if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
+ if (CN->isZero())
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+ }
+
template<MVT::SimpleValueType VT>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVEAddSubImm(N, VT, Imm, Shift);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6914b7c7d40e..20a7cf6b1d57 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -22,13 +22,27 @@ def NormalFrm : Format<1>; // Do we need any others?
// Enum describing whether an instruction is
// destructive in its first source operand.
-class DestructiveInstTypeEnum<bits<1> val> {
- bits<1> Value = val;
+class DestructiveInstTypeEnum<bits<4> val> {
+ bits<4> Value = val;
}
-def NotDestructive : DestructiveInstTypeEnum<0>;
+def NotDestructive : DestructiveInstTypeEnum<0>;
// Destructive in its first operand and can be MOVPRFX'd, but has no other
// special properties.
-def DestructiveOther : DestructiveInstTypeEnum<1>;
+def DestructiveOther : DestructiveInstTypeEnum<1>;
+def DestructiveUnary : DestructiveInstTypeEnum<2>;
+def DestructiveBinaryImm : DestructiveInstTypeEnum<3>;
+def DestructiveBinaryShImmUnpred : DestructiveInstTypeEnum<4>;
+def DestructiveBinary : DestructiveInstTypeEnum<5>;
+def DestructiveBinaryComm : DestructiveInstTypeEnum<6>;
+def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>;
+def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>;
+
+class FalseLanesEnum<bits<2> val> {
+ bits<2> Value = val;
+}
+def FalseLanesNone : FalseLanesEnum<0>;
+def FalseLanesZero : FalseLanesEnum<1>;
+def FalseLanesUndef : FalseLanesEnum<2>;
// AArch64 Instruction Format
class AArch64Inst<Format f, string cstr> : Instruction {
@@ -46,10 +60,12 @@ class AArch64Inst<Format f, string cstr> : Instruction {
bits<2> Form = F.Value;
// Defaults
+ FalseLanesEnum FalseLanes = FalseLanesNone;
DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
ElementSizeEnum ElementSize = ElementSizeNone;
- let TSFlags{3} = DestructiveInstType.Value;
+ let TSFlags{8-7} = FalseLanes.Value;
+ let TSFlags{6-3} = DestructiveInstType.Value;
let TSFlags{2-0} = ElementSize.Value;
let Pattern = [];
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 9933c64dc873..f121114cf3ca 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -119,11 +119,25 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
break;
+ case TargetOpcode::BUNDLE:
+ NumBytes = getInstBundleLength(MI);
+ break;
}
return NumBytes;
}
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI.getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += getInstSizeInBytes(*I);
+ }
+ return Size;
+}
+
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
SmallVectorImpl<MachineOperand> &Cond) {
// Block ends with fall-through condbranch.
@@ -6680,5 +6694,10 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
+uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::ElementSizeMask;
+}
+
#define GET_INSTRINFO_HELPERS
+#define GET_INSTRMAP_INFO
#include "AArch64GenInstrInfo.inc"
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 298467e9bda3..ee425f47a5ee 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -271,6 +271,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
+ /// Returns the vector element size (B, H, S or D) of an SVE opcode.
+ uint64_t getElementSizeForOpcode(unsigned Opc) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);
@@ -295,6 +297,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
isCopyInstrImpl(const MachineInstr &MI) const override;
private:
+ unsigned getInstBundleLength(const MachineInstr &MI) const;
+
/// Sets the offsets on outlined instructions in \p MBB which use SP
/// so that they will be valid post-outlining.
///
@@ -381,7 +385,8 @@ static inline bool isIndirectBranchOpcode(int Opc) {
// struct TSFlags {
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
-#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
+#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
+#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
// }
namespace AArch64 {
@@ -396,13 +401,31 @@ enum ElementSizeType {
};
enum DestructiveInstType {
- DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
- NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
- DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+ DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf),
+ NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
+ DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+ DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2),
+ DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3),
+ DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4),
+ DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5),
+ DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
+ DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
+ DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
+};
+
+enum FalseLaneType {
+ FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3),
+ FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1),
+ FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
};
#undef TSFLAG_ELEMENT_SIZE_TYPE
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
+#undef TSFLAG_FALSE_LANE_TYPE
+
+int getSVEPseudoMap(uint16_t Opcode);
+int getSVERevInstr(uint16_t Opcode);
+int getSVEOrigInstr(uint16_t Opcode);
}
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 61e48a75f8f2..44ec1de51463 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -239,19 +239,32 @@ let Predicates = [HasSVE] in {
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
- defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", int_aarch64_sve_fadd>;
- defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", int_aarch64_sve_fsub>;
- defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", int_aarch64_sve_fmul>;
- defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", int_aarch64_sve_fsubr>;
- defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", int_aarch64_sve_fmaxnm>;
- defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", int_aarch64_sve_fminnm>;
- defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", int_aarch64_sve_fmax>;
- defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", int_aarch64_sve_fmin>;
- defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", int_aarch64_sve_fabd>;
+ defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
+ defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ", 1>;
+ defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>;
+ defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", 0>;
+ defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
+ defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
+ defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>;
+ defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>;
+ defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>;
defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
- defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", int_aarch64_sve_fmulx>;
- defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", int_aarch64_sve_fdivr>;
- defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", int_aarch64_sve_fdiv>;
+ defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>;
+ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", 0>;
+ defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ", 1>;
+
+ defm FADD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fadd>;
+ defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsub>;
+ defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmul>;
+ defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fminnm>;
+ defm FMAX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmax>;
+ defm FMIN_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmin>;
+ defm FABD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fabd>;
+ defm FMULX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmulx>;
+ defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdivr>;
+ defm FDIV_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdiv>;
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index bc7b4384662a..a5676d286ebe 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -644,4 +644,7 @@ void AArch64PassConfig::addPreEmitPass() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
+
+ // SVE bundles move prefixes with destructive operations.
+ addPass(createUnpackMachineBundles(nullptr));
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7cb638356c4a..56c7f2f5c9ae 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -367,8 +367,16 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
+def SVEDup0 : ComplexPattern<i64, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
+let AddedComplexity = 1 in {
+class SVE_3_Op_Pat_SelZero<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst>
+: Pat<(vtd (vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3))),
+ (inst $Op1, $Op2, $Op3)>;
+}
+
//
// Common but less generic patterns.
//
@@ -378,6 +386,55 @@ class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;
+//
+// Pseudo -> Instruction mappings
+//
+def getSVEPseudoMap : InstrMapping {
+ let FilterClass = "SVEPseudo2Instr";
+ let RowFields = ["PseudoName"];
+ let ColFields = ["IsInstr"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+class SVEPseudo2Instr<string name, bit instr> {
+ string PseudoName = name;
+ bit IsInstr = instr;
+}
+
+def getSVERevInstr : InstrMapping {
+ let FilterClass = "SVEInstr2Rev";
+ let RowFields = ["InstrName"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+def getSVEOrigInstr : InstrMapping {
+ let FilterClass = "SVEInstr2Rev";
+ let RowFields = ["InstrName"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+class SVEInstr2Rev<string name, string revname, bit nameIsOrig> {
+ string InstrName = !if(nameIsOrig, name, revname);
+ bit IsOrig = nameIsOrig;
+}
+
+//
+// Pseudos for destructive operands
+//
+let hasNoSchedulingInfo = 1 in {
+ class PredTwoOpPseudo<string name, ZPRRegOp zprty,
+ FalseLanesEnum flags = FalseLanesNone>
+ : SVEPseudo2Instr<name, 0>,
+ Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2), []> {
+ let FalseLanes = flags;
+ }
+}
+
//===----------------------------------------------------------------------===//
// SVE Predicate Misc Group
//===----------------------------------------------------------------------===//
@@ -1427,11 +1484,17 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_2op_p_zds<bits<4> opc, string asm,
- SDPatternOperator op> {
- def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
- def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
- def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
+multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
+ SDPatternOperator op, DestructiveInstTypeEnum flags,
+ string revname="", bit isOrig=0> {
+ let DestructiveInstType = flags in {
+ def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isOrig>;
+ def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isOrig>;
+ def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isOrig>;
+ }
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
@@ -1449,6 +1512,16 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve_fp_2op_p_zds_zx<SDPatternOperator op> {
+ def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
+ def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
+ def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
+
+ def : SVE_3_Op_Pat_SelZero<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _ZERO_H)>;
+ def : SVE_3_Op_Pat_SelZero<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _ZERO_S)>;
+ def : SVE_3_Op_Pat_SelZero<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _ZERO_D)>;
+}
+
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index 9b7e2f6dd288..f9613d40d6bd 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -68,6 +68,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: AArch64 Branch Targets
; CHECK-NEXT: Branch relaxation pass
+; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index bf489cbbae65..65b88ee0aa99 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -178,6 +178,7 @@
; CHECK-NEXT: AArch64 Branch Targets
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: AArch64 Compress Jump Tables
+; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
new file mode 100644
index 000000000000..bda892f61de2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
@@ -0,0 +1,261 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve < %s | FileCheck %s
+
+;
+; FADD
+;
+
+define <vscale x 4 x float> @fadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fadd_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fadd_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMAX
+;
+
+define <vscale x 4 x float> @fmax_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmax_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmax_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmax_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMAXNM
+;
+
+define <vscale x 4 x float> @fmaxnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmaxnm_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmaxnm_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMIN
+;
+
+define <vscale x 4 x float> @fmin_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmin_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmin_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmin_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMINNM
+;
+
+define <vscale x 4 x float> @fminnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fminnm_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fminnm_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMUL
+;
+
+define <vscale x 4 x float> @fmul_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmul_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmul_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FSUB
+;
+
+define <vscale x 4 x float> @fsub_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsub_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fsub_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsub_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FSUBR
+;
+
+define <vscale x 4 x float> @fsubr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsubr_s:
+; CHECK: movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a_z,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsubr_d:
+; CHECK: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a_z,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
More information about the llvm-commits
mailing list