[llvm] b564036 - [MachineCombiner][NFC] Split target-dependent patterns
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 10 21:20:31 PDT 2024
Author: Pengcheng Wang
Date: 2024-04-11T12:20:27+08:00
New Revision: b5640369337e98e573c949080ed4a4061ec6ec9a
URL: https://github.com/llvm/llvm-project/commit/b5640369337e98e573c949080ed4a4061ec6ec9a
DIFF: https://github.com/llvm/llvm-project/commit/b5640369337e98e573c949080ed4a4061ec6ec9a.diff
LOG: [MachineCombiner][NFC] Split target-dependent patterns
We split target-dependent MachineCombiner patterns into their target
folder.
This makes MachineCombiner much more target-independent.
Reviewers:
davemgreen, asavonic, rotateright, RKSimon, lukel97, LuoYuanke, topperc, mshockwave, asi-sc
Reviewed By: topperc, mshockwave
Pull Request: https://github.com/llvm/llvm-project/pull/87991
Added:
Modified:
llvm/include/llvm/CodeGen/MachineCombinerPattern.h
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/MachineCombiner.cpp
llvm/lib/CodeGen/TargetInstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.h
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InstrInfo.h
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 41b73eaae0298c..3428c4dde5c7fc 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -16,8 +16,16 @@
namespace llvm {
+/// The combiner's goal may
diff er based on which pattern it is attempting
+/// to optimize.
+enum class CombinerObjective {
+ MustReduceDepth, // The data dependency chain must be improved.
+ MustReduceRegisterPressure, // The register pressure must be reduced.
+ Default // The critical path must not be lengthened.
+};
+
/// These are instruction patterns matched by the machine combiner pass.
-enum class MachineCombinerPattern {
+enum MachineCombinerPattern : unsigned {
// These are commutative variants for reassociating a computation chain. See
// the comments before getMachineCombinerPatterns() in TargetInstrInfo.cpp.
REASSOC_AX_BY,
@@ -25,163 +33,7 @@ enum class MachineCombinerPattern {
REASSOC_XA_BY,
REASSOC_XA_YB,
- // These are patterns matched by the PowerPC to reassociate FMA chains.
- REASSOC_XY_AMM_BMM,
- REASSOC_XMM_AMM_BMM,
-
- // These are patterns matched by the PowerPC to reassociate FMA and FSUB to
- // reduce register pressure.
- REASSOC_XY_BCA,
- REASSOC_XY_BAC,
-
- // These are patterns used to reduce the length of dependence chain.
- SUBADD_OP1,
- SUBADD_OP2,
-
- // These are multiply-add patterns matched by the AArch64 machine combiner.
- MULADDW_OP1,
- MULADDW_OP2,
- MULSUBW_OP1,
- MULSUBW_OP2,
- MULADDWI_OP1,
- MULSUBWI_OP1,
- MULADDX_OP1,
- MULADDX_OP2,
- MULSUBX_OP1,
- MULSUBX_OP2,
- MULADDXI_OP1,
- MULSUBXI_OP1,
- // NEON integers vectors
- MULADDv8i8_OP1,
- MULADDv8i8_OP2,
- MULADDv16i8_OP1,
- MULADDv16i8_OP2,
- MULADDv4i16_OP1,
- MULADDv4i16_OP2,
- MULADDv8i16_OP1,
- MULADDv8i16_OP2,
- MULADDv2i32_OP1,
- MULADDv2i32_OP2,
- MULADDv4i32_OP1,
- MULADDv4i32_OP2,
-
- MULSUBv8i8_OP1,
- MULSUBv8i8_OP2,
- MULSUBv16i8_OP1,
- MULSUBv16i8_OP2,
- MULSUBv4i16_OP1,
- MULSUBv4i16_OP2,
- MULSUBv8i16_OP1,
- MULSUBv8i16_OP2,
- MULSUBv2i32_OP1,
- MULSUBv2i32_OP2,
- MULSUBv4i32_OP1,
- MULSUBv4i32_OP2,
-
- MULADDv4i16_indexed_OP1,
- MULADDv4i16_indexed_OP2,
- MULADDv8i16_indexed_OP1,
- MULADDv8i16_indexed_OP2,
- MULADDv2i32_indexed_OP1,
- MULADDv2i32_indexed_OP2,
- MULADDv4i32_indexed_OP1,
- MULADDv4i32_indexed_OP2,
-
- MULSUBv4i16_indexed_OP1,
- MULSUBv4i16_indexed_OP2,
- MULSUBv8i16_indexed_OP1,
- MULSUBv8i16_indexed_OP2,
- MULSUBv2i32_indexed_OP1,
- MULSUBv2i32_indexed_OP2,
- MULSUBv4i32_indexed_OP1,
- MULSUBv4i32_indexed_OP2,
-
- // Floating Point
- FMULADDH_OP1,
- FMULADDH_OP2,
- FMULSUBH_OP1,
- FMULSUBH_OP2,
- FMULADDS_OP1,
- FMULADDS_OP2,
- FMULSUBS_OP1,
- FMULSUBS_OP2,
- FMULADDD_OP1,
- FMULADDD_OP2,
- FMULSUBD_OP1,
- FMULSUBD_OP2,
- FNMULSUBH_OP1,
- FNMULSUBS_OP1,
- FNMULSUBD_OP1,
- FMLAv1i32_indexed_OP1,
- FMLAv1i32_indexed_OP2,
- FMLAv1i64_indexed_OP1,
- FMLAv1i64_indexed_OP2,
- FMLAv4f16_OP1,
- FMLAv4f16_OP2,
- FMLAv8f16_OP1,
- FMLAv8f16_OP2,
- FMLAv2f32_OP2,
- FMLAv2f32_OP1,
- FMLAv2f64_OP1,
- FMLAv2f64_OP2,
- FMLAv4i16_indexed_OP1,
- FMLAv4i16_indexed_OP2,
- FMLAv8i16_indexed_OP1,
- FMLAv8i16_indexed_OP2,
- FMLAv2i32_indexed_OP1,
- FMLAv2i32_indexed_OP2,
- FMLAv2i64_indexed_OP1,
- FMLAv2i64_indexed_OP2,
- FMLAv4f32_OP1,
- FMLAv4f32_OP2,
- FMLAv4i32_indexed_OP1,
- FMLAv4i32_indexed_OP2,
- FMLSv1i32_indexed_OP2,
- FMLSv1i64_indexed_OP2,
- FMLSv4f16_OP1,
- FMLSv4f16_OP2,
- FMLSv8f16_OP1,
- FMLSv8f16_OP2,
- FMLSv2f32_OP1,
- FMLSv2f32_OP2,
- FMLSv2f64_OP1,
- FMLSv2f64_OP2,
- FMLSv4i16_indexed_OP1,
- FMLSv4i16_indexed_OP2,
- FMLSv8i16_indexed_OP1,
- FMLSv8i16_indexed_OP2,
- FMLSv2i32_indexed_OP1,
- FMLSv2i32_indexed_OP2,
- FMLSv2i64_indexed_OP1,
- FMLSv2i64_indexed_OP2,
- FMLSv4f32_OP1,
- FMLSv4f32_OP2,
- FMLSv4i32_indexed_OP1,
- FMLSv4i32_indexed_OP2,
-
- FMULv2i32_indexed_OP1,
- FMULv2i32_indexed_OP2,
- FMULv2i64_indexed_OP1,
- FMULv2i64_indexed_OP2,
- FMULv4i16_indexed_OP1,
- FMULv4i16_indexed_OP2,
- FMULv4i32_indexed_OP1,
- FMULv4i32_indexed_OP2,
- FMULv8i16_indexed_OP1,
- FMULv8i16_indexed_OP2,
-
- // RISCV FMADD, FMSUB, FNMSUB patterns
- FMADD_AX,
- FMADD_XA,
- FMSUB,
- FNMSUB,
- SHXADD_ADD_SLLI_OP1,
- SHXADD_ADD_SLLI_OP2,
-
- // X86 VNNI
- DPWSSD,
-
- FNMADD,
+ TARGET_PATTERN_START
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 9fd0ebe6956fbe..d4a83e3753d980 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/Uniformity.h"
#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -61,7 +62,6 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class TargetSchedModel;
class TargetSubtargetInfo;
-enum class MachineCombinerPattern;
enum class MachineTraceStrategy;
template <class T> class SmallVectorImpl;
@@ -1191,10 +1191,9 @@ class TargetInstrInfo : public MCInstrInfo {
/// faster sequence.
/// \param Root - Instruction that could be combined with one of its operands
/// \param Patterns - Vector of possible combination patterns
- virtual bool
- getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) const;
+ virtual bool getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ bool DoRegPressureReduce) const;
/// Return true if target supports reassociation of instructions in machine
/// combiner pass to reduce register pressure for a given BB.
@@ -1206,13 +1205,17 @@ class TargetInstrInfo : public MCInstrInfo {
/// Fix up the placeholder we may add in genAlternativeCodeSequence().
virtual void
- finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+ finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs) const {}
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
- virtual bool isThroughputPattern(MachineCombinerPattern Pattern) const;
+ virtual bool isThroughputPattern(unsigned Pattern) const;
+
+ /// Return the objective of a combiner pattern.
+ /// \param Pattern - combiner pattern
+ virtual CombinerObjective getCombinerObjective(unsigned Pattern) const;
/// Return true if the input \P Inst is part of a chain of dependent ops
/// that are suitable for reassociation, otherwise return false.
@@ -1256,7 +1259,7 @@ class TargetInstrInfo : public MCInstrInfo {
/// \param InstIdxForVirtReg - map of virtual register to instruction in
/// InsInstr that defines it
virtual void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const;
@@ -1270,8 +1273,7 @@ class TargetInstrInfo : public MCInstrInfo {
/// Attempt to reassociate \P Root and \P Prev according to \P Pattern to
/// reduce critical path length.
- void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
- MachineCombinerPattern Pattern,
+ void reassociateOps(MachineInstr &Root, MachineInstr &Prev, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
@@ -1281,8 +1283,7 @@ class TargetInstrInfo : public MCInstrInfo {
/// (new root opcode, new prev opcode) that must be used to reassociate \P
/// Root and \P Prev accoring to \P Pattern.
std::pair<unsigned, unsigned>
- getReassociationOpcodes(MachineCombinerPattern Pattern,
- const MachineInstr &Root,
+ getReassociationOpcodes(unsigned Pattern, const MachineInstr &Root,
const MachineInstr &Prev) const;
/// The limit on resource length extension we accept in MachineCombiner Pass.
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index a4c87a7678bd8d..ac58162bbfb420 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -99,17 +99,16 @@ class MachineCombiner : public MachineFunctionPass {
const MachineBasicBlock &MBB);
unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
MachineTraceMetrics::Trace BlockTrace);
- bool
- improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
- MachineTraceMetrics::Trace BlockTrace,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern, bool SlackIsAccurate);
+ bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ unsigned Pattern, bool SlackIsAccurate);
bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
- MachineCombinerPattern Pattern);
+ unsigned Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -123,7 +122,8 @@ class MachineCombiner : public MachineFunctionPass {
MachineTraceMetrics::Trace BlockTrace);
void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
- SmallVector<MachineCombinerPattern, 16> &Patterns);
+ SmallVector<unsigned, 16> &Patterns);
+ CombinerObjective getCombinerObjective(unsigned Pattern);
};
}
@@ -290,36 +290,17 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
return NewRootLatency;
}
-/// The combiner's goal may
diff er based on which pattern it is attempting
-/// to optimize.
-enum class CombinerObjective {
- MustReduceDepth, // The data dependency chain must be improved.
- MustReduceRegisterPressure, // The register pressure must be reduced.
- Default // The critical path must not be lengthened.
-};
-
-static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+CombinerObjective MachineCombiner::getCombinerObjective(unsigned Pattern) {
// TODO: If C++ ever gets a real enum class, make this part of the
// MachineCombinerPattern class.
- switch (P) {
+ switch (Pattern) {
case MachineCombinerPattern::REASSOC_AX_BY:
case MachineCombinerPattern::REASSOC_AX_YB:
case MachineCombinerPattern::REASSOC_XA_BY:
case MachineCombinerPattern::REASSOC_XA_YB:
- case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
- case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
- case MachineCombinerPattern::SUBADD_OP1:
- case MachineCombinerPattern::SUBADD_OP2:
- case MachineCombinerPattern::FMADD_AX:
- case MachineCombinerPattern::FMADD_XA:
- case MachineCombinerPattern::FMSUB:
- case MachineCombinerPattern::FNMSUB:
return CombinerObjective::MustReduceDepth;
- case MachineCombinerPattern::REASSOC_XY_BCA:
- case MachineCombinerPattern::REASSOC_XY_BAC:
- return CombinerObjective::MustReduceRegisterPressure;
default:
- return CombinerObjective::Default;
+ return TII->getCombinerObjective(Pattern);
}
}
@@ -349,8 +330,7 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
bool MachineCombiner::reduceRegisterPressure(
MachineInstr &Root, MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- MachineCombinerPattern Pattern) {
+ SmallVectorImpl<MachineInstr *> &DelInstrs, unsigned Pattern) {
// FIXME: for now, we don't do any check for the register pressure patterns.
// We treat them as always profitable. But we can do better if we make
// RegPressureTracker class be aware of TIE attribute. Then we can get an
@@ -368,8 +348,7 @@ bool MachineCombiner::improvesCriticalPathLen(
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned Pattern,
bool SlackIsAccurate) {
// Get depth and latency of NewRoot and Root.
unsigned NewRootDepth =
@@ -493,13 +472,14 @@ bool MachineCombiner::preservesResourceLen(
/// \param Pattern is used to call target hook finalizeInsInstrs
/// \param IncrementalUpdate if true, compute instruction depths incrementally,
/// otherwise invalidate the trace
-static void insertDeleteInstructions(
- MachineBasicBlock *MBB, MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- MachineTraceMetrics::Ensemble *TraceEnsemble,
- SparseSet<LiveRegUnit> &RegUnits, const TargetInstrInfo *TII,
- MachineCombinerPattern Pattern, bool IncrementalUpdate) {
+static void
+insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Ensemble *TraceEnsemble,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetInstrInfo *TII, unsigned Pattern,
+ bool IncrementalUpdate) {
// If we want to fix up some placeholder for some target, do it now.
// We need this because in genAlternativeCodeSequence, we have not decided the
// better pattern InsInstrs or DelInstrs, so we don't want generate some
@@ -534,9 +514,9 @@ static void insertDeleteInstructions(
// Check that the
diff erence between original and new latency is decreasing for
// later patterns. This helps to discover sub-optimal pattern orderings.
-void MachineCombiner::verifyPatternOrder(
- MachineBasicBlock *MBB, MachineInstr &Root,
- SmallVector<MachineCombinerPattern, 16> &Patterns) {
+void MachineCombiner::verifyPatternOrder(MachineBasicBlock *MBB,
+ MachineInstr &Root,
+ SmallVector<unsigned, 16> &Patterns) {
long PrevLatencyDiff = std::numeric_limits<long>::max();
(void)PrevLatencyDiff; // Variable is used in assert only.
for (auto P : Patterns) {
@@ -590,7 +570,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
- SmallVector<MachineCombinerPattern, 16> Patterns;
+ SmallVector<unsigned, 16> Patterns;
// The motivating example is:
//
// MUL Other MUL_op1 MUL_op2 Other
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 9fbd516acea8e1..7d77e5d1a1ff05 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -919,7 +919,7 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
// instruction is known to not increase the critical path, then don't match
// that pattern.
bool TargetInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const {
bool Commute;
if (isReassociationCandidate(Root, Commute)) {
@@ -941,13 +941,17 @@ bool TargetInstrInfo::getMachineCombinerPatterns(
}
/// Return true when a code sequence can improve loop throughput.
-bool
-TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+bool TargetInstrInfo::isThroughputPattern(unsigned Pattern) const {
return false;
}
+CombinerObjective
+TargetInstrInfo::getCombinerObjective(unsigned Pattern) const {
+ return CombinerObjective::Default;
+}
+
std::pair<unsigned, unsigned>
-TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern,
+TargetInstrInfo::getReassociationOpcodes(unsigned Pattern,
const MachineInstr &Root,
const MachineInstr &Prev) const {
bool AssocCommutRoot = isAssociativeAndCommutative(Root);
@@ -1036,7 +1040,7 @@ TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern,
// Return a pair of boolean flags showing if the new root and new prev operands
// must be swapped. See visual example of the rule in
// TargetInstrInfo::getReassociationOpcodes.
-static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) {
+static std::pair<bool, bool> mustSwapOperands(unsigned Pattern) {
switch (Pattern) {
default:
llvm_unreachable("Unexpected pattern");
@@ -1054,8 +1058,7 @@ static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) {
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
- MachineInstr &Root, MachineInstr &Prev,
- MachineCombinerPattern Pattern,
+ MachineInstr &Root, MachineInstr &Prev, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -1177,7 +1180,7 @@ void TargetInstrInfo::reassociateOps(
}
void TargetInstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 9783b33219460c..92647cb405252f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6043,7 +6043,7 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
/// Find instructions that can be turned into madd.
static bool getMaddPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ SmallVectorImpl<unsigned> &Patterns) {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -6064,21 +6064,21 @@ static bool getMaddPatterns(MachineInstr &Root,
}
auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
- MachineCombinerPattern Pattern) {
+ unsigned Pattern) {
if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
Patterns.push_back(Pattern);
Found = true;
}
};
- auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
+ auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
Patterns.push_back(Pattern);
Found = true;
}
};
- typedef MachineCombinerPattern MCP;
+ typedef AArch64MachineCombinerPattern MCP;
switch (Opc) {
default:
@@ -6184,7 +6184,7 @@ static bool getMaddPatterns(MachineInstr &Root,
/// Find instructions that can be turned into madd.
static bool getFMAPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ SmallVectorImpl<unsigned> &Patterns) {
if (!isCombineInstrCandidateFP(Root))
return false;
@@ -6192,8 +6192,7 @@ static bool getFMAPatterns(MachineInstr &Root,
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
- auto Match = [&](int Opcode, int Operand,
- MachineCombinerPattern Pattern) -> bool {
+ auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
Patterns.push_back(Pattern);
return true;
@@ -6201,7 +6200,7 @@ static bool getFMAPatterns(MachineInstr &Root,
return false;
};
- typedef MachineCombinerPattern MCP;
+ typedef AArch64MachineCombinerPattern MCP;
switch (Root.getOpcode()) {
default:
@@ -6327,12 +6326,11 @@ static bool getFMAPatterns(MachineInstr &Root,
}
static bool getFMULPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ SmallVectorImpl<unsigned> &Patterns) {
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
- auto Match = [&](unsigned Opcode, int Operand,
- MachineCombinerPattern Pattern) -> bool {
+ auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineOperand &MO = Root.getOperand(Operand);
MachineInstr *MI = nullptr;
@@ -6349,7 +6347,7 @@ static bool getFMULPatterns(MachineInstr &Root,
return false;
};
- typedef MachineCombinerPattern MCP;
+ typedef AArch64MachineCombinerPattern MCP;
switch (Root.getOpcode()) {
default:
@@ -6380,12 +6378,12 @@ static bool getFMULPatterns(MachineInstr &Root,
}
static bool getFNEGPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ SmallVectorImpl<unsigned> &Patterns) {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- auto Match = [&](unsigned Opcode, MachineCombinerPattern Pattern) -> bool {
+ auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
MachineOperand &MO = Root.getOperand(1);
MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
if (MI != nullptr && (MI->getOpcode() == Opcode) &&
@@ -6404,9 +6402,9 @@ static bool getFNEGPatterns(MachineInstr &Root,
default:
break;
case AArch64::FNEGDr:
- return Match(AArch64::FMADDDrrr, MachineCombinerPattern::FNMADD);
+ return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
case AArch64::FNEGSr:
- return Match(AArch64::FMADDSrrr, MachineCombinerPattern::FNMADD);
+ return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
}
return false;
@@ -6415,116 +6413,115 @@ static bool getFNEGPatterns(MachineInstr &Root,
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
-bool AArch64InstrInfo::isThroughputPattern(
- MachineCombinerPattern Pattern) const {
+bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
switch (Pattern) {
default:
break;
- case MachineCombinerPattern::FMULADDH_OP1:
- case MachineCombinerPattern::FMULADDH_OP2:
- case MachineCombinerPattern::FMULSUBH_OP1:
- case MachineCombinerPattern::FMULSUBH_OP2:
- case MachineCombinerPattern::FMULADDS_OP1:
- case MachineCombinerPattern::FMULADDS_OP2:
- case MachineCombinerPattern::FMULSUBS_OP1:
- case MachineCombinerPattern::FMULSUBS_OP2:
- case MachineCombinerPattern::FMULADDD_OP1:
- case MachineCombinerPattern::FMULADDD_OP2:
- case MachineCombinerPattern::FMULSUBD_OP1:
- case MachineCombinerPattern::FMULSUBD_OP2:
- case MachineCombinerPattern::FNMULSUBH_OP1:
- case MachineCombinerPattern::FNMULSUBS_OP1:
- case MachineCombinerPattern::FNMULSUBD_OP1:
- case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
- case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
- case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
- case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
- case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
- case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
- case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
- case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
- case MachineCombinerPattern::FMLAv4f16_OP2:
- case MachineCombinerPattern::FMLAv4f16_OP1:
- case MachineCombinerPattern::FMLAv8f16_OP1:
- case MachineCombinerPattern::FMLAv8f16_OP2:
- case MachineCombinerPattern::FMLAv2f32_OP2:
- case MachineCombinerPattern::FMLAv2f32_OP1:
- case MachineCombinerPattern::FMLAv2f64_OP1:
- case MachineCombinerPattern::FMLAv2f64_OP2:
- case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
- case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
- case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
- case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
- case MachineCombinerPattern::FMLAv4f32_OP1:
- case MachineCombinerPattern::FMLAv4f32_OP2:
- case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
- case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
- case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
- case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
- case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
- case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
- case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
- case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
- case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
- case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
- case MachineCombinerPattern::FMLSv4f16_OP1:
- case MachineCombinerPattern::FMLSv4f16_OP2:
- case MachineCombinerPattern::FMLSv8f16_OP1:
- case MachineCombinerPattern::FMLSv8f16_OP2:
- case MachineCombinerPattern::FMLSv2f32_OP2:
- case MachineCombinerPattern::FMLSv2f64_OP2:
- case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
- case MachineCombinerPattern::FMLSv4f32_OP2:
- case MachineCombinerPattern::FMULv2i32_indexed_OP1:
- case MachineCombinerPattern::FMULv2i32_indexed_OP2:
- case MachineCombinerPattern::FMULv2i64_indexed_OP1:
- case MachineCombinerPattern::FMULv2i64_indexed_OP2:
- case MachineCombinerPattern::FMULv4i16_indexed_OP1:
- case MachineCombinerPattern::FMULv4i16_indexed_OP2:
- case MachineCombinerPattern::FMULv4i32_indexed_OP1:
- case MachineCombinerPattern::FMULv4i32_indexed_OP2:
- case MachineCombinerPattern::FMULv8i16_indexed_OP1:
- case MachineCombinerPattern::FMULv8i16_indexed_OP2:
- case MachineCombinerPattern::MULADDv8i8_OP1:
- case MachineCombinerPattern::MULADDv8i8_OP2:
- case MachineCombinerPattern::MULADDv16i8_OP1:
- case MachineCombinerPattern::MULADDv16i8_OP2:
- case MachineCombinerPattern::MULADDv4i16_OP1:
- case MachineCombinerPattern::MULADDv4i16_OP2:
- case MachineCombinerPattern::MULADDv8i16_OP1:
- case MachineCombinerPattern::MULADDv8i16_OP2:
- case MachineCombinerPattern::MULADDv2i32_OP1:
- case MachineCombinerPattern::MULADDv2i32_OP2:
- case MachineCombinerPattern::MULADDv4i32_OP1:
- case MachineCombinerPattern::MULADDv4i32_OP2:
- case MachineCombinerPattern::MULSUBv8i8_OP1:
- case MachineCombinerPattern::MULSUBv8i8_OP2:
- case MachineCombinerPattern::MULSUBv16i8_OP1:
- case MachineCombinerPattern::MULSUBv16i8_OP2:
- case MachineCombinerPattern::MULSUBv4i16_OP1:
- case MachineCombinerPattern::MULSUBv4i16_OP2:
- case MachineCombinerPattern::MULSUBv8i16_OP1:
- case MachineCombinerPattern::MULSUBv8i16_OP2:
- case MachineCombinerPattern::MULSUBv2i32_OP1:
- case MachineCombinerPattern::MULSUBv2i32_OP2:
- case MachineCombinerPattern::MULSUBv4i32_OP1:
- case MachineCombinerPattern::MULSUBv4i32_OP2:
- case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
- case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
- case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
- case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
- case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
- case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
- case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
- case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
- case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
- case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
- case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
- case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
- case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
- case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
- case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
- case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMULADDH_OP1:
+ case AArch64MachineCombinerPattern::FMULADDH_OP2:
+ case AArch64MachineCombinerPattern::FMULSUBH_OP1:
+ case AArch64MachineCombinerPattern::FMULSUBH_OP2:
+ case AArch64MachineCombinerPattern::FMULADDS_OP1:
+ case AArch64MachineCombinerPattern::FMULADDS_OP2:
+ case AArch64MachineCombinerPattern::FMULSUBS_OP1:
+ case AArch64MachineCombinerPattern::FMULSUBS_OP2:
+ case AArch64MachineCombinerPattern::FMULADDD_OP1:
+ case AArch64MachineCombinerPattern::FMULADDD_OP2:
+ case AArch64MachineCombinerPattern::FMULSUBD_OP1:
+ case AArch64MachineCombinerPattern::FMULSUBD_OP2:
+ case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
+ case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
+ case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
+ case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
+ case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
+ case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
+ case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
+ case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
+ case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
+ case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
+ case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
+ case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
+ case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
+ case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
+ case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
+ case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
return true;
} // end switch (Pattern)
return false;
@@ -6532,8 +6529,7 @@ bool AArch64InstrInfo::isThroughputPattern(
/// Find other MI combine patterns.
static bool getMiscPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns)
-{
+ SmallVectorImpl<unsigned> &Patterns) {
// A - (B + C) ==> (A - B) - C or (A - C) - B
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
@@ -6557,21 +6553,32 @@ static bool getMiscPatterns(MachineInstr &Root,
canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
- Patterns.push_back(MachineCombinerPattern::SUBADD_OP1);
- Patterns.push_back(MachineCombinerPattern::SUBADD_OP2);
+ Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP1);
+ Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP2);
return true;
}
return false;
}
+CombinerObjective
+AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
+ switch (Pattern) {
+ case AArch64MachineCombinerPattern::SUBADD_OP1:
+ case AArch64MachineCombinerPattern::SUBADD_OP2:
+ return CombinerObjective::MustReduceDepth;
+ default:
+ return TargetInstrInfo::getCombinerObjective(Pattern);
+ }
+}
+
/// Return true when there is potentially a faster code sequence for an
/// instruction chain ending in \p Root. All potential patterns are listed in
/// the \p Pattern vector. Pattern should be sorted in priority order since the
/// pattern evaluator stops checking as soon as it finds a faster sequence.
bool AArch64InstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const {
// Integer patterns
if (getMaddPatterns(Root, Patterns))
@@ -6930,7 +6937,7 @@ genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
/// this function generates the instructions that could replace the
/// original code sequence
void AArch64InstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -6948,25 +6955,25 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
DelInstrs, InstrIdxForVirtReg);
return;
- case MachineCombinerPattern::SUBADD_OP1:
+ case AArch64MachineCombinerPattern::SUBADD_OP1:
// A - (B + C)
// ==> (A - B) - C
genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
InstrIdxForVirtReg);
break;
- case MachineCombinerPattern::SUBADD_OP2:
+ case AArch64MachineCombinerPattern::SUBADD_OP2:
// A - (B + C)
// ==> (A - C) - B
genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
InstrIdxForVirtReg);
break;
- case MachineCombinerPattern::MULADDW_OP1:
- case MachineCombinerPattern::MULADDX_OP1:
+ case AArch64MachineCombinerPattern::MULADDW_OP1:
+ case AArch64MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0
// ADD R,I,C
// ==> MADD R,A,B,C
// --- Create(MADD);
- if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -6975,13 +6982,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDW_OP2:
- case MachineCombinerPattern::MULADDX_OP2:
+ case AArch64MachineCombinerPattern::MULADDW_OP2:
+ case AArch64MachineCombinerPattern::MULADDX_OP2:
// MUL I=A,B,0
// ADD R,C,I
// ==> MADD R,A,B,C
// --- Create(MADD);
- if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -6990,8 +6997,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDWI_OP1:
- case MachineCombinerPattern::MULADDXI_OP1: {
+ case AArch64MachineCombinerPattern::MULADDWI_OP1:
+ case AArch64MachineCombinerPattern::MULADDXI_OP1: {
// MUL I=A,B,0
// ADD R,I,Imm
// ==> MOV V, Imm
@@ -6999,7 +7006,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
@@ -7052,8 +7059,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
- case MachineCombinerPattern::MULSUBW_OP1:
- case MachineCombinerPattern::MULSUBX_OP1: {
+ case AArch64MachineCombinerPattern::MULSUBW_OP1:
+ case AArch64MachineCombinerPattern::MULSUBX_OP1: {
// MUL I=A,B,0
// SUB R,I, C
// ==> SUB V, 0, C
@@ -7061,7 +7068,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *SubRC;
unsigned SubOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) {
SubOpc = AArch64::SUBWrr;
SubRC = &AArch64::GPR32spRegClass;
ZeroReg = AArch64::WZR;
@@ -7085,13 +7092,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
- case MachineCombinerPattern::MULSUBW_OP2:
- case MachineCombinerPattern::MULSUBX_OP2:
+ case AArch64MachineCombinerPattern::MULSUBW_OP2:
+ case AArch64MachineCombinerPattern::MULSUBX_OP2:
// MUL I=A,B,0
// SUB R,C,I
// ==> MSUB R,A,B,C (computes C - A*B)
// --- Create(MSUB);
- if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) {
Opc = AArch64::MSUBWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -7100,8 +7107,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBWI_OP1:
- case MachineCombinerPattern::MULSUBXI_OP1: {
+ case AArch64MachineCombinerPattern::MULSUBWI_OP1:
+ case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
// ==> MOV V, -Imm
@@ -7109,7 +7116,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
@@ -7162,318 +7169,318 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
break;
}
- case MachineCombinerPattern::MULADDv8i8_OP1:
+ case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv8i8_OP2:
+ case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv16i8_OP1:
+ case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
Opc = AArch64::MLAv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv16i8_OP2:
+ case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
Opc = AArch64::MLAv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i16_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
Opc = AArch64::MLAv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i16_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
Opc = AArch64::MLAv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv8i16_OP1:
+ case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
Opc = AArch64::MLAv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv8i16_OP2:
+ case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
Opc = AArch64::MLAv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv2i32_OP1:
+ case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
Opc = AArch64::MLAv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv2i32_OP2:
+ case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
Opc = AArch64::MLAv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i32_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
Opc = AArch64::MLAv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i32_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
Opc = AArch64::MLAv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv8i8_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
RC);
break;
- case MachineCombinerPattern::MULSUBv8i8_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
Opc = AArch64::MLSv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv16i8_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
Opc = AArch64::MLAv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
RC);
break;
- case MachineCombinerPattern::MULSUBv16i8_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
Opc = AArch64::MLSv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv4i16_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
Opc = AArch64::MLAv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
RC);
break;
- case MachineCombinerPattern::MULSUBv4i16_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
Opc = AArch64::MLSv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv8i16_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
Opc = AArch64::MLAv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
RC);
break;
- case MachineCombinerPattern::MULSUBv8i16_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
Opc = AArch64::MLSv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv2i32_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
Opc = AArch64::MLAv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
RC);
break;
- case MachineCombinerPattern::MULSUBv2i32_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
Opc = AArch64::MLSv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv4i32_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
Opc = AArch64::MLAv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
RC);
break;
- case MachineCombinerPattern::MULSUBv4i32_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
Opc = AArch64::MLSv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
Opc = AArch64::MLAv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
Opc = AArch64::MLAv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
Opc = AArch64::MLAv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
Opc = AArch64::MLAv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
Opc = AArch64::MLAv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
Opc = AArch64::MLAv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
Opc = AArch64::MLAv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
Opc = AArch64::MLAv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
Opc = AArch64::MLAv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
RC);
break;
- case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
Opc = AArch64::MLSv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
Opc = AArch64::MLAv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
RC);
break;
- case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
Opc = AArch64::MLSv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
Opc = AArch64::MLAv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
RC);
break;
- case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
Opc = AArch64::MLSv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
Opc = AArch64::MLAv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
RC);
break;
- case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
Opc = AArch64::MLSv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
// Floating Point Support
- case MachineCombinerPattern::FMULADDH_OP1:
+ case AArch64MachineCombinerPattern::FMULADDH_OP1:
Opc = AArch64::FMADDHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FMULADDS_OP1:
+ case AArch64MachineCombinerPattern::FMULADDS_OP1:
Opc = AArch64::FMADDSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FMULADDD_OP1:
+ case AArch64MachineCombinerPattern::FMULADDD_OP1:
Opc = AArch64::FMADDDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FMULADDH_OP2:
+ case AArch64MachineCombinerPattern::FMULADDH_OP2:
Opc = AArch64::FMADDHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::FMULADDS_OP2:
+ case AArch64MachineCombinerPattern::FMULADDS_OP2:
Opc = AArch64::FMADDSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::FMULADDD_OP2:
+ case AArch64MachineCombinerPattern::FMULADDD_OP2:
Opc = AArch64::FMADDDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
Opc = AArch64::FMLAv1i32_indexed;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
Opc = AArch64::FMLAv1i32_indexed;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
Opc = AArch64::FMLAv1i64_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
Opc = AArch64::FMLAv1i64_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv4f16_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator);
break;
- case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv4f16_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
- case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
- case MachineCombinerPattern::FMLAv2f32_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
RC = &AArch64::FPR64RegClass;
- if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
Opc = AArch64::FMLAv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
@@ -7483,10 +7490,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
FMAInstKind::Accumulator);
}
break;
- case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
- case MachineCombinerPattern::FMLAv2f32_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
RC = &AArch64::FPR64RegClass;
- if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
Opc = AArch64::FMLAv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
@@ -7497,35 +7504,35 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
- case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv8f16_OP1:
+ case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator);
break;
- case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLAv8f16_OP2:
+ case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
- case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
- case MachineCombinerPattern::FMLAv2f64_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
RC = &AArch64::FPR128RegClass;
- if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
Opc = AArch64::FMLAv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
@@ -7535,10 +7542,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
FMAInstKind::Accumulator);
}
break;
- case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
- case MachineCombinerPattern::FMLAv2f64_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
RC = &AArch64::FPR128RegClass;
- if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
Opc = AArch64::FMLAv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
@@ -7549,10 +7556,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
- case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
- case MachineCombinerPattern::FMLAv4f32_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
RC = &AArch64::FPR128RegClass;
- if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
Opc = AArch64::FMLAv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
@@ -7563,10 +7570,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
- case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
- case MachineCombinerPattern::FMLAv4f32_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
RC = &AArch64::FPR128RegClass;
- if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
Opc = AArch64::FMLAv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
@@ -7577,70 +7584,70 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
- case MachineCombinerPattern::FMULSUBH_OP1:
+ case AArch64MachineCombinerPattern::FMULSUBH_OP1:
Opc = AArch64::FNMSUBHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FMULSUBS_OP1:
+ case AArch64MachineCombinerPattern::FMULSUBS_OP1:
Opc = AArch64::FNMSUBSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FMULSUBD_OP1:
+ case AArch64MachineCombinerPattern::FMULSUBD_OP1:
Opc = AArch64::FNMSUBDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FNMULSUBH_OP1:
+ case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
Opc = AArch64::FNMADDHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FNMULSUBS_OP1:
+ case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
Opc = AArch64::FNMADDSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FNMULSUBD_OP1:
+ case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
Opc = AArch64::FNMADDDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::FMULSUBH_OP2:
+ case AArch64MachineCombinerPattern::FMULSUBH_OP2:
Opc = AArch64::FMSUBHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::FMULSUBS_OP2:
+ case AArch64MachineCombinerPattern::FMULSUBS_OP2:
Opc = AArch64::FMSUBSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::FMULSUBD_OP2:
+ case AArch64MachineCombinerPattern::FMULSUBD_OP2:
Opc = AArch64::FMSUBDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
Opc = AArch64::FMLSv1i32_indexed;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
Opc = AArch64::FMLSv1i64_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLSv4f16_OP1:
- case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
+ case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
+ case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
RC = &AArch64::FPR64RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
@@ -7648,7 +7655,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) {
Opc = AArch64::FMLAv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator, &NewVR);
@@ -7659,23 +7666,23 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
- case MachineCombinerPattern::FMLSv4f16_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLSv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
- case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLSv4i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLSv2f32_OP2:
- case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
RC = &AArch64::FPR64RegClass;
- if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
Opc = AArch64::FMLSv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
@@ -7686,8 +7693,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
- case MachineCombinerPattern::FMLSv8f16_OP1:
- case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
+ case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
+ case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
@@ -7695,7 +7702,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) {
Opc = AArch64::FMLAv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator, &NewVR);
@@ -7706,23 +7713,23 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
- case MachineCombinerPattern::FMLSv8f16_OP2:
+ case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLSv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
- case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLSv8i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
- case MachineCombinerPattern::FMLSv2f64_OP2:
- case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
+ case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
RC = &AArch64::FPR128RegClass;
- if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
Opc = AArch64::FMLSv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
@@ -7733,10 +7740,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
- case MachineCombinerPattern::FMLSv4f32_OP2:
- case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
+ case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
RC = &AArch64::FPR128RegClass;
- if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
Opc = AArch64::FMLSv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
@@ -7746,8 +7753,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
FMAInstKind::Accumulator);
}
break;
- case MachineCombinerPattern::FMLSv2f32_OP1:
- case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
+ case AArch64MachineCombinerPattern::FMLSv2f32_OP1:
+ case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
RC = &AArch64::FPR64RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
@@ -7755,7 +7762,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
Opc = AArch64::FMLAv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
@@ -7766,8 +7773,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
- case MachineCombinerPattern::FMLSv4f32_OP1:
- case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
+ case AArch64MachineCombinerPattern::FMLSv4f32_OP1:
+ case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
@@ -7775,7 +7782,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
Opc = AArch64::FMLAv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
@@ -7786,8 +7793,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
- case MachineCombinerPattern::FMLSv2f64_OP1:
- case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
+ case AArch64MachineCombinerPattern::FMLSv2f64_OP1:
+ case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
@@ -7795,7 +7802,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
+ if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
Opc = AArch64::FMLAv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
@@ -7806,47 +7813,52 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
- case MachineCombinerPattern::FMULv2i32_indexed_OP1:
- case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
+ case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: {
unsigned IdxDupOp =
- (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
+ (Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1
+ : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
&AArch64::FPR128RegClass, MRI);
break;
}
- case MachineCombinerPattern::FMULv2i64_indexed_OP1:
- case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
+ case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: {
unsigned IdxDupOp =
- (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
+ (Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1
+ : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
&AArch64::FPR128RegClass, MRI);
break;
}
- case MachineCombinerPattern::FMULv4i16_indexed_OP1:
- case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
+ case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: {
unsigned IdxDupOp =
- (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
+ (Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1
+ : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
&AArch64::FPR128_loRegClass, MRI);
break;
}
- case MachineCombinerPattern::FMULv4i32_indexed_OP1:
- case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
+ case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: {
unsigned IdxDupOp =
- (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
+ (Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1
+ : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
&AArch64::FPR128RegClass, MRI);
break;
}
- case MachineCombinerPattern::FMULv8i16_indexed_OP1:
- case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
+ case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: {
unsigned IdxDupOp =
- (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
+ (Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1
+ : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
&AArch64::FPR128_loRegClass, MRI);
break;
}
- case MachineCombinerPattern::FNMADD: {
+ case AArch64MachineCombinerPattern::FNMADD: {
MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 2f10f80f4bdf70..9a2914891675c5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -33,6 +33,146 @@ static const MachineMemOperand::Flags MOStridedAccess =
#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
+// AArch64 MachineCombiner patterns
+enum AArch64MachineCombinerPattern : unsigned {
+ // These are patterns used to reduce the length of dependence chain.
+ SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START,
+ SUBADD_OP2,
+
+ // These are multiply-add patterns matched by the AArch64 machine combiner.
+ MULADDW_OP1,
+ MULADDW_OP2,
+ MULSUBW_OP1,
+ MULSUBW_OP2,
+ MULADDWI_OP1,
+ MULSUBWI_OP1,
+ MULADDX_OP1,
+ MULADDX_OP2,
+ MULSUBX_OP1,
+ MULSUBX_OP2,
+ MULADDXI_OP1,
+ MULSUBXI_OP1,
+ // NEON integers vectors
+ MULADDv8i8_OP1,
+ MULADDv8i8_OP2,
+ MULADDv16i8_OP1,
+ MULADDv16i8_OP2,
+ MULADDv4i16_OP1,
+ MULADDv4i16_OP2,
+ MULADDv8i16_OP1,
+ MULADDv8i16_OP2,
+ MULADDv2i32_OP1,
+ MULADDv2i32_OP2,
+ MULADDv4i32_OP1,
+ MULADDv4i32_OP2,
+
+ MULSUBv8i8_OP1,
+ MULSUBv8i8_OP2,
+ MULSUBv16i8_OP1,
+ MULSUBv16i8_OP2,
+ MULSUBv4i16_OP1,
+ MULSUBv4i16_OP2,
+ MULSUBv8i16_OP1,
+ MULSUBv8i16_OP2,
+ MULSUBv2i32_OP1,
+ MULSUBv2i32_OP2,
+ MULSUBv4i32_OP1,
+ MULSUBv4i32_OP2,
+
+ MULADDv4i16_indexed_OP1,
+ MULADDv4i16_indexed_OP2,
+ MULADDv8i16_indexed_OP1,
+ MULADDv8i16_indexed_OP2,
+ MULADDv2i32_indexed_OP1,
+ MULADDv2i32_indexed_OP2,
+ MULADDv4i32_indexed_OP1,
+ MULADDv4i32_indexed_OP2,
+
+ MULSUBv4i16_indexed_OP1,
+ MULSUBv4i16_indexed_OP2,
+ MULSUBv8i16_indexed_OP1,
+ MULSUBv8i16_indexed_OP2,
+ MULSUBv2i32_indexed_OP1,
+ MULSUBv2i32_indexed_OP2,
+ MULSUBv4i32_indexed_OP1,
+ MULSUBv4i32_indexed_OP2,
+
+ // Floating Point
+ FMULADDH_OP1,
+ FMULADDH_OP2,
+ FMULSUBH_OP1,
+ FMULSUBH_OP2,
+ FMULADDS_OP1,
+ FMULADDS_OP2,
+ FMULSUBS_OP1,
+ FMULSUBS_OP2,
+ FMULADDD_OP1,
+ FMULADDD_OP2,
+ FMULSUBD_OP1,
+ FMULSUBD_OP2,
+ FNMULSUBH_OP1,
+ FNMULSUBS_OP1,
+ FNMULSUBD_OP1,
+ FMLAv1i32_indexed_OP1,
+ FMLAv1i32_indexed_OP2,
+ FMLAv1i64_indexed_OP1,
+ FMLAv1i64_indexed_OP2,
+ FMLAv4f16_OP1,
+ FMLAv4f16_OP2,
+ FMLAv8f16_OP1,
+ FMLAv8f16_OP2,
+ FMLAv2f32_OP2,
+ FMLAv2f32_OP1,
+ FMLAv2f64_OP1,
+ FMLAv2f64_OP2,
+ FMLAv4i16_indexed_OP1,
+ FMLAv4i16_indexed_OP2,
+ FMLAv8i16_indexed_OP1,
+ FMLAv8i16_indexed_OP2,
+ FMLAv2i32_indexed_OP1,
+ FMLAv2i32_indexed_OP2,
+ FMLAv2i64_indexed_OP1,
+ FMLAv2i64_indexed_OP2,
+ FMLAv4f32_OP1,
+ FMLAv4f32_OP2,
+ FMLAv4i32_indexed_OP1,
+ FMLAv4i32_indexed_OP2,
+ FMLSv1i32_indexed_OP2,
+ FMLSv1i64_indexed_OP2,
+ FMLSv4f16_OP1,
+ FMLSv4f16_OP2,
+ FMLSv8f16_OP1,
+ FMLSv8f16_OP2,
+ FMLSv2f32_OP1,
+ FMLSv2f32_OP2,
+ FMLSv2f64_OP1,
+ FMLSv2f64_OP2,
+ FMLSv4i16_indexed_OP1,
+ FMLSv4i16_indexed_OP2,
+ FMLSv8i16_indexed_OP1,
+ FMLSv8i16_indexed_OP2,
+ FMLSv2i32_indexed_OP1,
+ FMLSv2i32_indexed_OP2,
+ FMLSv2i64_indexed_OP1,
+ FMLSv2i64_indexed_OP2,
+ FMLSv4f32_OP1,
+ FMLSv4f32_OP2,
+ FMLSv4i32_indexed_OP1,
+ FMLSv4i32_indexed_OP2,
+
+ FMULv2i32_indexed_OP1,
+ FMULv2i32_indexed_OP2,
+ FMULv2i64_indexed_OP1,
+ FMULv2i64_indexed_OP2,
+ FMULv4i16_indexed_OP1,
+ FMULv4i16_indexed_OP2,
+ FMULv4i32_indexed_OP1,
+ FMULv4i32_indexed_OP2,
+ FMULv8i16_indexed_OP1,
+ FMULv8i16_indexed_OP2,
+
+ FNMADD,
+};
class AArch64InstrInfo final : public AArch64GenInstrInfo {
const AArch64RegisterInfo RI;
const AArch64Subtarget &Subtarget;
@@ -283,17 +423,17 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
const MachineRegisterInfo *MRI) const override;
bool optimizeCondBranch(MachineInstr &MI) const override;
+ CombinerObjective getCombinerObjective(unsigned Pattern) const override;
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
- bool isThroughputPattern(MachineCombinerPattern Pattern) const override;
+ bool isThroughputPattern(unsigned Pattern) const override;
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in ``Root``. All potential patterns are
/// listed in the ``Patterns`` array.
- bool
- getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) const override;
+ bool getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ bool DoRegPressureReduce) const override;
/// Return true when Inst is associative and commutative so that it can be
/// reassociated. If Invert is true, then the inverse of Inst operation must
/// be checked.
@@ -302,7 +442,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence
void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 5f5eb31a5a85fa..93874d65531aed 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -348,9 +348,9 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
// register with D. After the transformation, A and D must be assigned with
// same hardware register due to TIE attribute of FMA instructions.
//
-bool PPCInstrInfo::getFMAPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) const {
+bool PPCInstrInfo::getFMAPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ bool DoRegPressureReduce) const {
MachineBasicBlock *MBB = Root.getParent();
const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -476,7 +476,7 @@ bool PPCInstrInfo::getFMAPatterns(
if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
- Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA);
+ Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_BCA);
return true;
}
@@ -484,7 +484,7 @@ bool PPCInstrInfo::getFMAPatterns(
if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
- Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC);
+ Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_BAC);
return true;
}
}
@@ -511,12 +511,12 @@ bool PPCInstrInfo::getFMAPatterns(
MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
AddOpIdx = -1;
if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
- Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+ Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM);
LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
return true;
}
if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
- Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
+ Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM);
LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
return true;
}
@@ -524,7 +524,7 @@ bool PPCInstrInfo::getFMAPatterns(
}
void PPCInstrInfo::finalizeInsInstrs(
- MachineInstr &Root, MachineCombinerPattern &P,
+ MachineInstr &Root, unsigned &Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs) const {
assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
@@ -542,12 +542,12 @@ void PPCInstrInfo::finalizeInsInstrs(
// For now we only need to fix up placeholder for register pressure reduce
// patterns.
Register ConstReg = 0;
- switch (P) {
- case MachineCombinerPattern::REASSOC_XY_BCA:
+ switch (Pattern) {
+ case PPCMachineCombinerPattern::REASSOC_XY_BCA:
ConstReg =
TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
break;
- case MachineCombinerPattern::REASSOC_XY_BAC:
+ case PPCMachineCombinerPattern::REASSOC_XY_BAC:
ConstReg =
TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
break;
@@ -737,8 +737,21 @@ PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const {
return nullptr;
}
+CombinerObjective PPCInstrInfo::getCombinerObjective(unsigned Pattern) const {
+ switch (Pattern) {
+ case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ return CombinerObjective::MustReduceDepth;
+ case PPCMachineCombinerPattern::REASSOC_XY_BCA:
+ case PPCMachineCombinerPattern::REASSOC_XY_BAC:
+ return CombinerObjective::MustReduceRegisterPressure;
+ default:
+ return TargetInstrInfo::getCombinerObjective(Pattern);
+ }
+}
+
bool PPCInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const {
// Using the machine combiner in this way is potentially expensive, so
// restrict to when aggressive optimizations are desired.
@@ -753,15 +766,15 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
}
void PPCInstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
switch (Pattern) {
- case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
- case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
- case MachineCombinerPattern::REASSOC_XY_BCA:
- case MachineCombinerPattern::REASSOC_XY_BAC:
+ case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ case PPCMachineCombinerPattern::REASSOC_XY_BCA:
+ case PPCMachineCombinerPattern::REASSOC_XY_BAC:
reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
break;
default:
@@ -773,7 +786,7 @@ void PPCInstrInfo::genAlternativeCodeSequence(
}
void PPCInstrInfo::reassociateFMA(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -790,8 +803,8 @@ void PPCInstrInfo::reassociateFMA(
assert(Idx >= 0 && "Root must be a FMA instruction");
bool IsILPReassociate =
- (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) ||
- (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+ (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) ||
+ (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM);
uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
@@ -801,18 +814,18 @@ void PPCInstrInfo::reassociateFMA(
switch (Pattern) {
default:
llvm_unreachable("not recognized pattern!");
- case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
- case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:
Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
break;
- case MachineCombinerPattern::REASSOC_XY_BAC: {
+ case PPCMachineCombinerPattern::REASSOC_XY_BAC: {
Register MULReg =
TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
Leaf = MRI.getVRegDef(MULReg);
break;
}
- case MachineCombinerPattern::REASSOC_XY_BCA: {
+ case PPCMachineCombinerPattern::REASSOC_XY_BCA: {
Register MULReg = TRI->lookThruCopyLike(
Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
Leaf = MRI.getVRegDef(MULReg);
@@ -853,10 +866,10 @@ void PPCInstrInfo::reassociateFMA(
if (IsILPReassociate)
GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
- if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
- } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ } else if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) {
GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
} else {
@@ -881,7 +894,7 @@ void PPCInstrInfo::reassociateFMA(
}
Register NewVRD = 0;
- if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
NewVRD = MRI.createVirtualRegister(RC);
InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
}
@@ -901,7 +914,7 @@ void PPCInstrInfo::reassociateFMA(
switch (Pattern) {
default:
llvm_unreachable("not recognized pattern!");
- case MachineCombinerPattern::REASSOC_XY_AMM_BMM: {
+ case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM: {
// Create new instructions for insertion.
MachineInstrBuilder MINewB =
BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
@@ -936,7 +949,7 @@ void PPCInstrInfo::reassociateFMA(
InsInstrs.push_back(MINewC);
break;
}
- case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: {
+ case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM: {
assert(NewVRD && "new FMA register not created!");
// Create new instructions for insertion.
MachineInstrBuilder MINewA =
@@ -980,11 +993,11 @@ void PPCInstrInfo::reassociateFMA(
InsInstrs.push_back(MINewC);
break;
}
- case MachineCombinerPattern::REASSOC_XY_BAC:
- case MachineCombinerPattern::REASSOC_XY_BCA: {
+ case PPCMachineCombinerPattern::REASSOC_XY_BAC:
+ case PPCMachineCombinerPattern::REASSOC_XY_BCA: {
Register VarReg;
bool KillVarReg = false;
- if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) {
+ if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_BCA) {
VarReg = RegM31;
KillVarReg = KillM31;
} else {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 045932dc0d3ba1..1e2687f92c61e5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -85,6 +85,19 @@ enum SpillOpcodeKey {
SOK_LastOpcodeSpill // This must be last on the enum.
};
+// PPC MachineCombiner patterns
+enum PPCMachineCombinerPattern : unsigned {
+ // These are patterns matched by the PowerPC to reassociate FMA chains.
+ REASSOC_XY_AMM_BMM = MachineCombinerPattern::TARGET_PATTERN_START,
+ REASSOC_XMM_AMM_BMM,
+
+ // These are patterns matched by the PowerPC to reassociate FMA and FSUB to
+ // reduce register pressure.
+ REASSOC_XY_BCA,
+ REASSOC_XY_BAC,
+
+};
+
// Define list of load and store spill opcodes.
#define NoInstr PPC::INSTRUCTION_LIST_END
#define Pwr8LoadOpcodes \
@@ -224,7 +237,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
ArrayRef<unsigned> getLoadOpcodesForSpillArray() const;
unsigned getSpillIndex(const TargetRegisterClass *RC) const;
int16_t getFMAOpIdxInfo(unsigned Opcode) const;
- void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
+ void reassociateFMA(MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
@@ -350,7 +363,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence
void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
@@ -358,15 +371,16 @@ class PPCInstrInfo : public PPCGenInstrInfo {
/// Return true when there is potentially a faster code sequence for a fma
/// chain ending in \p Root. All potential patterns are output in the \p
/// P array.
- bool getFMAPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &P,
+ bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const;
+ CombinerObjective getCombinerObjective(unsigned Pattern) const override;
+
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// output in the <Pattern> array.
bool getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &P,
+ SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const override;
/// On PowerPC, we leverage machine combiner pass to reduce register pressure
@@ -380,7 +394,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
/// Fixup the placeholders we put in genAlternativeCodeSequence() for
/// MachineCombiner.
void
- finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+ finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs) const override;
bool isAssociativeAndCommutative(const MachineInstr &Inst,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 84d754e3cbcf30..d78f5bd9dedf3d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1560,7 +1560,7 @@ MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
}
void RISCVInstrInfo::finalizeInsInstrs(
- MachineInstr &Root, MachineCombinerPattern &P,
+ MachineInstr &Root, unsigned &Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs) const {
int16_t FrmOpIdx =
RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
@@ -1748,10 +1748,9 @@ static bool canCombineFPFusedMultiply(const MachineInstr &Root,
return RISCV::hasEqualFRM(Root, *MI);
}
-static bool
-getFPFusedMultiplyPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) {
+static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ bool DoRegPressureReduce) {
unsigned Opc = Root.getOpcode();
bool IsFAdd = isFADD(Opc);
if (!IsFAdd && !isFSUB(Opc))
@@ -1759,21 +1758,21 @@ getFPFusedMultiplyPatterns(MachineInstr &Root,
bool Added = false;
if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
DoRegPressureReduce)) {
- Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX
- : MachineCombinerPattern::FMSUB);
+ Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
+ : RISCVMachineCombinerPattern::FMSUB);
Added = true;
}
if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
DoRegPressureReduce)) {
- Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA
- : MachineCombinerPattern::FNMSUB);
+ Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
+ : RISCVMachineCombinerPattern::FNMSUB);
Added = true;
}
return Added;
}
static bool getFPPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) {
return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
}
@@ -1832,9 +1831,8 @@ static unsigned getSHXADDShiftAmount(unsigned Opc) {
// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
// (sh3add (sh2add Y, Z), X).
-static bool
-getSHXADDPatterns(const MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+static bool getSHXADDPatterns(const MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns) {
unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
if (!ShiftAmt)
return false;
@@ -1847,19 +1845,31 @@ getSHXADDPatterns(const MachineInstr &Root,
bool Found = false;
if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {
- Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
+ Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
Found = true;
}
if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {
- Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
+ Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
Found = true;
}
return Found;
}
+CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
+ switch (Pattern) {
+ case RISCVMachineCombinerPattern::FMADD_AX:
+ case RISCVMachineCombinerPattern::FMADD_XA:
+ case RISCVMachineCombinerPattern::FMSUB:
+ case RISCVMachineCombinerPattern::FNMSUB:
+ return CombinerObjective::MustReduceDepth;
+ default:
+ return TargetInstrInfo::getCombinerObjective(Pattern);
+ }
+}
+
bool RISCVInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const {
if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
@@ -1872,8 +1882,7 @@ bool RISCVInstrInfo::getMachineCombinerPatterns(
DoRegPressureReduce);
}
-static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc,
- MachineCombinerPattern Pattern) {
+static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
switch (RootOpc) {
default:
llvm_unreachable("Unexpected opcode");
@@ -1884,32 +1893,32 @@ static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc,
case RISCV::FADD_D:
return RISCV::FMADD_D;
case RISCV::FSUB_H:
- return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
- : RISCV::FNMSUB_H;
+ return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
+ : RISCV::FNMSUB_H;
case RISCV::FSUB_S:
- return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
- : RISCV::FNMSUB_S;
+ return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
+ : RISCV::FNMSUB_S;
case RISCV::FSUB_D:
- return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
- : RISCV::FNMSUB_D;
+ return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
+ : RISCV::FNMSUB_D;
}
}
-static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) {
+static unsigned getAddendOperandIdx(unsigned Pattern) {
switch (Pattern) {
default:
llvm_unreachable("Unexpected pattern");
- case MachineCombinerPattern::FMADD_AX:
- case MachineCombinerPattern::FMSUB:
+ case RISCVMachineCombinerPattern::FMADD_AX:
+ case RISCVMachineCombinerPattern::FMSUB:
return 2;
- case MachineCombinerPattern::FMADD_XA:
- case MachineCombinerPattern::FNMSUB:
+ case RISCVMachineCombinerPattern::FMADD_XA:
+ case RISCVMachineCombinerPattern::FNMSUB:
return 1;
}
}
static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
- MachineCombinerPattern Pattern,
+ unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs) {
MachineFunction *MF = Root.getMF();
@@ -2013,7 +2022,7 @@ genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
}
void RISCVInstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -2023,22 +2032,22 @@ void RISCVInstrInfo::genAlternativeCodeSequence(
TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
DelInstrs, InstrIdxForVirtReg);
return;
- case MachineCombinerPattern::FMADD_AX:
- case MachineCombinerPattern::FMSUB: {
+ case RISCVMachineCombinerPattern::FMADD_AX:
+ case RISCVMachineCombinerPattern::FMSUB: {
MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
return;
}
- case MachineCombinerPattern::FMADD_XA:
- case MachineCombinerPattern::FNMSUB: {
+ case RISCVMachineCombinerPattern::FMADD_XA:
+ case RISCVMachineCombinerPattern::FNMSUB: {
MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
return;
}
- case MachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
+ case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
return;
- case MachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
+ case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
return;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 81d9c9db783c02..70fe7da85be0e7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -49,6 +49,16 @@ unsigned getBrCond(CondCode CC);
} // end of namespace RISCVCC
+// RISCV MachineCombiner patterns
+enum RISCVMachineCombinerPattern : unsigned {
+ FMADD_AX = MachineCombinerPattern::TARGET_PATTERN_START,
+ FMADD_XA,
+ FMSUB,
+ FNMSUB,
+ SHXADD_ADD_SLLI_OP1,
+ SHXADD_ADD_SLLI_OP2,
+};
+
class RISCVInstrInfo : public RISCVGenInstrInfo {
public:
@@ -240,17 +250,18 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
MachineTraceStrategy getMachineCombinerTraceStrategy() const override;
- bool
- getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) const override;
+ CombinerObjective getCombinerObjective(unsigned Pattern) const override;
+
+ bool getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ bool DoRegPressureReduce) const override;
void
- finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+ finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs) const override;
void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a5b2e4895eded2..510b08f9901a22 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10578,7 +10578,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
}
bool X86InstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const {
unsigned Opc = Root.getOpcode();
switch (Opc) {
@@ -10587,7 +10587,7 @@ bool X86InstrInfo::getMachineCombinerPatterns(
case X86::VPDPWSSDYrr:
case X86::VPDPWSSDYrm: {
if (!Subtarget.hasFastDPWSSD()) {
- Patterns.push_back(MachineCombinerPattern::DPWSSD);
+ Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
return true;
}
break;
@@ -10599,8 +10599,8 @@ bool X86InstrInfo::getMachineCombinerPatterns(
case X86::VPDPWSSDZr:
case X86::VPDPWSSDZm: {
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
- Patterns.push_back(MachineCombinerPattern::DPWSSD);
- return true;
+ Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
+ return true;
}
break;
}
@@ -10700,7 +10700,7 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
}
void X86InstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -10710,7 +10710,7 @@ void X86InstrInfo::genAlternativeCodeSequence(
TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
DelInstrs, InstrIdxForVirtReg);
return;
- case MachineCombinerPattern::DPWSSD:
+ case X86MachineCombinerPattern::DPWSSD:
genAlternativeDpCodeSequence(Root, *this, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
return;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e719be0caf3ee1..5407ede69a91ca 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -26,6 +26,12 @@
namespace llvm {
class X86Subtarget;
+// X86 MachineCombiner patterns
+enum X86MachineCombinerPattern : unsigned {
+ // X86 VNNI
+ DPWSSD = MachineCombinerPattern::TARGET_PATTERN_START,
+};
+
namespace X86 {
enum AsmComments {
@@ -607,16 +613,15 @@ class X86InstrInfo final : public X86GenInstrInfo {
std::optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr &MI) const override;
- bool
- getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) const override;
+ bool getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ bool DoRegPressureReduce) const override;
/// When getMachineCombinerPatterns() finds potential patterns,
/// this function generates the instructions that could replace the
/// original code sequence.
void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
More information about the llvm-commits
mailing list