[llvm] r242279 - [PowerPC] Use the MachineCombiner to reassociate fadd/fmul
Hal Finkel
hfinkel at anl.gov
Wed Jul 15 01:23:05 PDT 2015
Author: hfinkel
Date: Wed Jul 15 03:23:05 2015
New Revision: 242279
URL: http://llvm.org/viewvc/llvm-project?rev=242279&view=rev
Log:
[PowerPC] Use the MachineCombiner to reassociate fadd/fmul
This is a direct port of the code from the X86 backend (r239486/r240361), which
uses the MachineCombiner to reassociate (floating-point) adds/muls to increase
ILP, to the PowerPC backend. The rationale is the same.
There is a lot of copy-and-paste here between the X86 code and the PowerPC
code, and we should extract at least some of this into CodeGen somewhere.
However, I don't want to do that until this code is enhanced to handle FMAs as
well. After that, we'll be in a better position to extract the common parts.
Added:
llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=242279&r1=242278&r2=242279&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Wed Jul 15 03:23:05 2015
@@ -144,6 +144,9 @@ int PPCInstrInfo::getOperandLatency(cons
int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
UseMI, UseIdx);
+ if (!DefMI->getParent())
+ return Latency;
+
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
@@ -186,6 +189,265 @@ int PPCInstrInfo::getOperandLatency(cons
return Latency;
}
+static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) {
+ const MachineOperand &Op1 = Inst.getOperand(1);
+ const MachineOperand &Op2 = Inst.getOperand(2);
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // We need virtual register definitions.
+ MachineInstr *MI1 = nullptr;
+ MachineInstr *MI2 = nullptr;
+ if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+ if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+ // And they need to be in the trace (otherwise, they won't have a depth).
+ if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
+ return true;
+
+ return false;
+}
+
+static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+ MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+ unsigned AssocOpcode = Inst.getOpcode();
+
+ // If only one operand has the same opcode and it's the second source operand,
+ // the operands must be commuted.
+ Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ if (Commuted)
+ std::swap(MI1, MI2);
+
+ // 1. The previous instruction must be the same type as Inst.
+ // 2. The previous instruction must have virtual register definitions for its
+ // operands in the same basic block as Inst.
+ // 3. The previous instruction's result must only be used by Inst.
+ if (MI1->getOpcode() == AssocOpcode &&
+ hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
+ MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+ return true;
+
+ return false;
+}
+
+// This function does not list all associative and commutative operations, but
+// only those worth feeding through the machine combiner in an attempt to
+// reduce the critical path. Mostly, this means floating-point operations,
+// because they have high latencies (compared to other operations, such and
+// and/or, which are also associative and commutative, but have low latencies).
+//
+// The concept is that these operations can benefit from this kind of
+// transformation:
+//
+// A = ? op ?
+// B = A op X
+// C = B op Y
+// -->
+// A = ? op ?
+// B = X op Y
+// C = A op B
+//
+// breaking the dependency between A and B, allowing them to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+static bool isAssociativeAndCommutative(unsigned Opcode) {
+ switch (Opcode) {
+ // FP Add:
+ case PPC::FADD:
+ case PPC::FADDS:
+ // FP Multiply:
+ case PPC::FMUL:
+ case PPC::FMULS:
+ // Altivec Add:
+ case PPC::VADDFP:
+ // VSX Add:
+ case PPC::XSADDDP:
+ case PPC::XVADDDP:
+ case PPC::XVADDSP:
+ case PPC::XSADDSP:
+ // VSX Multiply:
+ case PPC::XSMULDP:
+ case PPC::XVMULDP:
+ case PPC::XVMULSP:
+ case PPC::XSMULSP:
+ // QPX Add:
+ case PPC::QVFADD:
+ case PPC::QVFADDS:
+ case PPC::QVFADDSs:
+ // QPX Multiply:
+ case PPC::QVFMUL:
+ case PPC::QVFMULS:
+ case PPC::QVFMULSs:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Return true if the input instruction is part of a chain of dependent ops
+/// that are suitable for reassociation, otherwise return false.
+/// If the instruction's operands must be commuted to have a previous
+/// instruction of the same type define the first source operand, Commuted will
+/// be set to true.
+static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
+ // 1. The operation must be associative and commutative.
+ // 2. The instruction must have virtual register definitions for its
+ // operands in the same basic block.
+ // 3. The instruction must have a reassociable sibling.
+ if (isAssociativeAndCommutative(Inst.getOpcode()) &&
+ hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
+ hasReassocSibling(Inst, Commuted))
+ return true;
+
+ return false;
+}
+
+bool PPCInstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
+ // Using the machine combiner in this way is potentially expensive, so
+ // restrict to when aggressive optimizations are desired.
+ if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
+ return false;
+
+ // FP reassociation is only legal when we don't need strict IEEE semantics.
+ if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+ return false;
+
+ // Look for this reassociation pattern:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+
+ // FIXME: We should also match FMA operations here, where we consider the
+ // 'part' of the FMA, either the addition or the multiplication, paired with
+ // an actual addition or multiplication.
+
+ bool Commute;
+ if (isReassocCandidate(Root, Commute)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP. Specify each commutation
+ // possibility for the Prev instruction in the sequence and let the
+ // machine combiner decide if changing the operands is worthwhile.
+ if (Commute) {
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
+ } else {
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// Attempt the following reassociation to reduce critical path length:
+/// B = A op X (Prev)
+/// C = B op Y (Root)
+/// ===>
+/// B = X op Y
+/// C = A op B
+static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+ MachineFunction *MF = Root.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+ // This array encodes the operand index for each parameter because the
+ // operands may be commuted. Each row corresponds to a pattern value,
+ // and each column specifies the index of A, B, X, Y.
+ unsigned OpIdx[4][4] = {
+ { 1, 1, 2, 2 },
+ { 1, 2, 2, 1 },
+ { 2, 1, 1, 2 },
+ { 2, 2, 1, 1 }
+ };
+
+ MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
+ MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
+ MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
+ MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
+ MachineOperand &OpC = Root.getOperand(0);
+
+ unsigned RegA = OpA.getReg();
+ unsigned RegB = OpB.getReg();
+ unsigned RegX = OpX.getReg();
+ unsigned RegY = OpY.getReg();
+ unsigned RegC = OpC.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA))
+ MRI.constrainRegClass(RegA, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI.constrainRegClass(RegB, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegX))
+ MRI.constrainRegClass(RegX, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegY))
+ MRI.constrainRegClass(RegY, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegC))
+ MRI.constrainRegClass(RegC, RC);
+
+ // Create a new virtual register for the result of (X op Y) instead of
+ // recycling RegB because the MachineCombiner's computation of the critical
+ // path requires a new register definition rather than an existing one.
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ unsigned Opcode = Root.getOpcode();
+ bool KillA = OpA.isKill();
+ bool KillX = OpX.isKill();
+ bool KillY = OpY.isKill();
+
+ // Create new instructions for insertion.
+ MachineInstrBuilder MIB1 =
+ BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegY, getKillRegState(KillY));
+ InsInstrs.push_back(MIB1);
+
+ MachineInstrBuilder MIB2 =
+ BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+ .addReg(RegA, getKillRegState(KillA))
+ .addReg(NewVR, getKillRegState(true));
+ InsInstrs.push_back(MIB2);
+
+ // Record old instructions for deletion.
+ DelInstrs.push_back(&Prev);
+ DelInstrs.push_back(&Root);
+}
+
+void PPCInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root,
+ MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+ // Select the previous instruction in the sequence based on the input pattern.
+ MachineInstr *Prev = nullptr;
+ switch (Pattern) {
+ case MachineCombinerPattern::MC_REASSOC_AX_BY:
+ case MachineCombinerPattern::MC_REASSOC_XA_BY:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ break;
+ case MachineCombinerPattern::MC_REASSOC_AX_YB:
+ case MachineCombinerPattern::MC_REASSOC_XA_YB:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ }
+ assert(Prev && "Unknown pattern for machine combiner");
+
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+ return;
+}
+
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h?rev=242279&r1=242278&r2=242279&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h Wed Jul 15 03:23:05 2015
@@ -63,6 +63,19 @@ enum PPC970_Unit {
};
} // end namespace PPCII
+namespace MachineCombinerPattern {
+enum MC_PATTERN : int {
+ // These are commutative variants for reassociating a computation chain
+ // of the form:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+ MC_REASSOC_AX_BY = 0,
+ MC_REASSOC_AX_YB = 1,
+ MC_REASSOC_XA_BY = 2,
+ MC_REASSOC_XA_YB = 3,
+};
+} // end namespace MachineCombinerPattern
+
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
@@ -119,6 +132,25 @@ public:
return false;
}
+ bool useMachineCombiner() const override {
+ return true;
+ }
+
+ /// Return true when there is potentially a faster code sequence
+ /// for an instruction chain ending in <Root>. All potential patterns are
+ /// output in the <Pattern> array.
+ bool getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &P) const override;
+
+ /// When getMachineCombinerPatterns() finds a pattern, this function generates
+ /// the instructions that could replace the original code sequence.
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const override;
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp?rev=242279&r1=242278&r2=242279&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp Wed Jul 15 03:23:05 2015
@@ -57,6 +57,11 @@ EnableExtraTOCRegDeps("enable-ppc-extra-
cl::desc("Add extra TOC register dependencies"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+EnableMachineCombinerPass("ppc-machine-combiner",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -316,6 +321,10 @@ bool PPCPassConfig::addPreISel() {
bool PPCPassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
+
+ if (EnableMachineCombinerPass)
+ addPass(&MachineCombinerID);
+
return true;
}
Added: llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll?rev=242279&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/machine-combiner.ll Wed Jul 15 03:23:05 2015
@@ -0,0 +1,188 @@
+; RUN: llc -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
+; RUN: llc -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Verify that the first two adds are independent regardless of how the inputs are
+; commuted. The destination registers are used as source registers for the third add.
+
+define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
+; produced because that would cost more compile time.
+
+define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG12:[0-9]+]], 5, 6
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG11:[0-9]+]], 3, 4
+; CHECK: fadds [[REG13:[0-9]+]], [[REG12]], 7
+; CHECK: fadds [[REG1:[0-9]+]], [[REG0]], [[REG11]]
+; CHECK: fadds [[REG2:[0-9]+]], [[REG1]], [[REG13]]
+; CHECK: fadds 1, [[REG2]], 8
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ %t3 = fadd float %t2, %x4
+ %t4 = fadd float %t3, %x5
+ %t5 = fadd float %t4, %x6
+ %t6 = fadd float %t5, %x7
+ ret float %t6
+}
+
+; Verify that we reassociate vector instructions too.
+
+define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds1:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %t0, %x2
+ %t2 = fadd <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds2:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %x2, %t0
+ %t2 = fadd <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds3:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %t0, %x2
+ %t2 = fadd <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds4:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %x2, %t0
+ %t2 = fadd <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
+ %t0 = fdiv float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
+ %t0 = fdiv float %x0, %x1
+ %t1 = fmul float %x2, %t0
+ %t2 = fmul float %x3, %t1
+ ret float %t2
+}
+
+define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
+ %t0 = fdiv double %x0, %x1
+ %t1 = fadd double %x2, %t0
+ %t2 = fadd double %x3, %t1
+ ret double %t2
+}
+
+define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
+ %t0 = fdiv double %x0, %x1
+ %t1 = fmul double %x2, %t0
+ %t2 = fmul double %x3, %t1
+ ret double %t2
+}
+
+
More information about the llvm-commits
mailing list