[llvm] 6c32a1f - [SystemZ] Enable MachineCombiner for FP reassociation (#83546)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 30 08:09:59 PDT 2024
Author: Jonas Paulsson
Date: 2024-04-30T17:09:54+02:00
New Revision: 6c32a1fdf712e58a324fc0f6e3dfc83ed7d56b1e
URL: https://github.com/llvm/llvm-project/commit/6c32a1fdf712e58a324fc0f6e3dfc83ed7d56b1e
DIFF: https://github.com/llvm/llvm-project/commit/6c32a1fdf712e58a324fc0f6e3dfc83ed7d56b1e.diff
LOG: [SystemZ] Enable MachineCombiner for FP reassociation (#83546)
Enable MachineCombining for FP add, sub and mul.
In order for this to work, the default instruction selection of reg/mem opcodes is disabled for ISD nodes that carry the flags that allow reassociation. The reg/mem folding is instead done after MachineCombiner by PeepholeOptimizer. SystemZInstrInfo optimizeLoadInstr() and foldMemoryOperandImpl() ("LoadMI version") have been implemented for this purpose also by this patch.
Added:
llvm/test/CodeGen/SystemZ/foldmem-peep.mir
llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll
Modified:
llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
llvm/lib/Target/SystemZ/SystemZInstrFP.td
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
llvm/lib/Target/SystemZ/SystemZInstrInfo.h
llvm/lib/Target/SystemZ/SystemZInstrVector.td
llvm/lib/Target/SystemZ/SystemZOperators.td
llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
llvm/test/CodeGen/SystemZ/anyregcc.ll
llvm/test/CodeGen/SystemZ/fp-add-01.ll
llvm/test/CodeGen/SystemZ/fp-add-02.ll
llvm/test/CodeGen/SystemZ/fp-mul-01.ll
llvm/test/CodeGen/SystemZ/fp-mul-03.ll
llvm/test/CodeGen/SystemZ/fp-sub-01.ll
llvm/test/CodeGen/SystemZ/fp-sub-02.ll
llvm/test/CodeGen/SystemZ/stackmap.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index deaf3dcaeb92a4..1bf23c2e8e4127 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -350,6 +350,11 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// Try to expand a boolean SELECT_CCMASK using an IPM sequence.
SDValue expandSelectBoolean(SDNode *Node);
+ // Return true if the flags of N and the subtarget allows for
+ // reassociation, in which case a reg/reg opcode is needed as input to the
+ // MachineCombiner.
+ bool shouldSelectForReassoc(SDNode *N) const;
+
public:
static char ID;
@@ -2044,6 +2049,15 @@ SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) {
return Result;
}
+bool SystemZDAGToDAGISel::shouldSelectForReassoc(SDNode *N) const {
+ EVT VT = N->getValueType(0);
+ assert(VT.isFloatingPoint() && "Expected FP SDNode");
+ return N->getFlags().hasAllowReassociation() &&
+ N->getFlags().hasNoSignedZeros() && Subtarget->hasVector() &&
+ (VT != MVT::f32 || Subtarget->hasVectorEnhancements1()) &&
+ !N->isStrictFPOpcode();
+}
+
void SystemZDAGToDAGISel::PreprocessISelDAG() {
// If we have conditional immediate loads, we always prefer
// using those over an IPM sequence.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index f4b5aeaebef923..aad04a2b4159cb 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -430,8 +430,10 @@ let Uses = [FPC], mayRaiseFPException = 1,
def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
}
- defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, z_load, 4>;
- defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, z_load, 8>;
+ defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, z_any_fadd_noreassoc, FP32,
+ z_load, 4>;
+ defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, z_any_fadd_noreassoc, FP64,
+ z_load, 8>;
}
// Subtraction.
@@ -441,8 +443,10 @@ let Uses = [FPC], mayRaiseFPException = 1,
def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
- defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, z_load, 4>;
- defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, z_load, 8>;
+ defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, z_any_fsub_noreassoc, FP32,
+ z_load, 4>;
+ defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, z_any_fsub_noreassoc, FP64,
+ z_load, 8>;
}
// Multiplication.
@@ -452,8 +456,10 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
}
- defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, z_load, 4>;
- defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, z_load, 8>;
+ defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, z_any_fmul_noreassoc, FP32,
+ z_load, 4>;
+ defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, z_any_fmul_noreassoc, FP64,
+ z_load, 8>;
}
// f64 multiplication of two FP32 registers.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 6b75c30943b40a..2b61cff727cdc7 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -610,6 +610,32 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
.addImm(CCValid).addImm(CCMask);
}
+MachineInstr *SystemZInstrInfo::optimizeLoadInstr(MachineInstr &MI,
+ const MachineRegisterInfo *MRI,
+ Register &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
+ // Check whether we can move the DefMI load, and that it only has one use.
+ DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
+ assert(DefMI);
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(nullptr, SawStore) ||
+ !MRI->hasOneNonDBGUse(FoldAsLoadDefReg))
+ return nullptr;
+
+ int UseOpIdx =
+ MI.findRegisterUseOperandIdx(FoldAsLoadDefReg, /*TRI=*/nullptr);
+ assert(UseOpIdx != -1 && "Expected FoldAsLoadDefReg to be used by MI.");
+
+ // Check whether we can fold the load.
+ if (MachineInstr *FoldMI =
+ foldMemoryOperand(MI, {((unsigned)UseOpIdx)}, *DefMI)) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
+ }
+
+ return nullptr;
+}
+
bool SystemZInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg,
MachineRegisterInfo *MRI) const {
@@ -1004,6 +1030,67 @@ SystemZInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
return nullptr;
}
+bool SystemZInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const {
+ unsigned Opc = Inst.getOpcode();
+ if (Invert) {
+ auto InverseOpcode = getInverseOpcode(Opc);
+ if (!InverseOpcode)
+ return false;
+ Opc = *InverseOpcode;
+ }
+
+ switch (Opc) {
+ default:
+ break;
+ // Adds and multiplications.
+ case SystemZ::WFADB:
+ case SystemZ::WFASB:
+ case SystemZ::WFAXB:
+ case SystemZ::VFADB:
+ case SystemZ::VFASB:
+ case SystemZ::WFMDB:
+ case SystemZ::WFMSB:
+ case SystemZ::WFMXB:
+ case SystemZ::VFMDB:
+ case SystemZ::VFMSB:
+ return (Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz));
+ }
+
+ return false;
+}
+
+std::optional<unsigned>
+SystemZInstrInfo::getInverseOpcode(unsigned Opcode) const {
+ // fadd => fsub
+ switch (Opcode) {
+ case SystemZ::WFADB:
+ return SystemZ::WFSDB;
+ case SystemZ::WFASB:
+ return SystemZ::WFSSB;
+ case SystemZ::WFAXB:
+ return SystemZ::WFSXB;
+ case SystemZ::VFADB:
+ return SystemZ::VFSDB;
+ case SystemZ::VFASB:
+ return SystemZ::VFSSB;
+ // fsub => fadd
+ case SystemZ::WFSDB:
+ return SystemZ::WFADB;
+ case SystemZ::WFSSB:
+ return SystemZ::WFASB;
+ case SystemZ::WFSXB:
+ return SystemZ::WFAXB;
+ case SystemZ::VFSDB:
+ return SystemZ::VFADB;
+ case SystemZ::VFSSB:
+ return SystemZ::VFASB;
+ default:
+ return std::nullopt;
+ }
+}
+
MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
@@ -1338,7 +1425,83 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
LiveIntervals *LIS) const {
- return nullptr;
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MachineBasicBlock *MBB = MI.getParent();
+
+ // For reassociable FP operations, any loads have been purposefully left
+ // unfolded so that MachineCombiner can do its work on reg/reg
+ // opcodes. After that, as many loads as possible are now folded.
+ // TODO: This may be beneficial with other opcodes as well as machine-sink
+ // can move loads close to their user in a
diff erent MBB, which the isel
+ // matcher did not see.
+ unsigned LoadOpc = 0;
+ unsigned RegMemOpcode = 0;
+ const TargetRegisterClass *FPRC = nullptr;
+ RegMemOpcode = MI.getOpcode() == SystemZ::WFADB ? SystemZ::ADB
+ : MI.getOpcode() == SystemZ::WFSDB ? SystemZ::SDB
+ : MI.getOpcode() == SystemZ::WFMDB ? SystemZ::MDB
+ : 0;
+ if (RegMemOpcode) {
+ LoadOpc = SystemZ::VL64;
+ FPRC = &SystemZ::FP64BitRegClass;
+ } else {
+ RegMemOpcode = MI.getOpcode() == SystemZ::WFASB ? SystemZ::AEB
+ : MI.getOpcode() == SystemZ::WFSSB ? SystemZ::SEB
+ : MI.getOpcode() == SystemZ::WFMSB ? SystemZ::MEEB
+ : 0;
+ if (RegMemOpcode) {
+ LoadOpc = SystemZ::VL32;
+ FPRC = &SystemZ::FP32BitRegClass;
+ }
+ }
+ if (!RegMemOpcode || LoadMI.getOpcode() != LoadOpc)
+ return nullptr;
+
+ // If RegMemOpcode clobbers CC, first make sure CC is not live at this point.
+ if (get(RegMemOpcode).hasImplicitDefOfPhysReg(SystemZ::CC)) {
+ assert(LoadMI.getParent() == MI.getParent() && "Assuming a local fold.");
+ assert(LoadMI != InsertPt && "Assuming InsertPt not to be first in MBB.");
+ for (MachineBasicBlock::iterator MII = std::prev(InsertPt);;
+ --MII) {
+ if (MII->definesRegister(SystemZ::CC, /*TRI=*/nullptr)) {
+ if (!MII->registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
+ return nullptr;
+ break;
+ }
+ if (MII == MBB->begin()) {
+ if (MBB->isLiveIn(SystemZ::CC))
+ return nullptr;
+ break;
+ }
+ }
+ }
+
+ Register FoldAsLoadDefReg = LoadMI.getOperand(0).getReg();
+ if (Ops.size() != 1 || FoldAsLoadDefReg != MI.getOperand(Ops[0]).getReg())
+ return nullptr;
+ Register DstReg = MI.getOperand(0).getReg();
+ MachineOperand LHS = MI.getOperand(1);
+ MachineOperand RHS = MI.getOperand(2);
+ MachineOperand &RegMO = RHS.getReg() == FoldAsLoadDefReg ? LHS : RHS;
+ if ((RegMemOpcode == SystemZ::SDB || RegMemOpcode == SystemZ::SEB) &&
+ FoldAsLoadDefReg != RHS.getReg())
+ return nullptr;
+
+ MachineOperand &Base = LoadMI.getOperand(1);
+ MachineOperand &Disp = LoadMI.getOperand(2);
+ MachineOperand &Indx = LoadMI.getOperand(3);
+ MachineInstrBuilder MIB =
+ BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(RegMemOpcode), DstReg)
+ .add(RegMO)
+ .add(Base)
+ .add(Disp)
+ .add(Indx);
+ MIB->addRegisterDead(SystemZ::CC, &RI);
+ MRI->setRegClass(DstReg, FPRC);
+ MRI->setRegClass(RegMO.getReg(), FPRC);
+ transferMIFlag(&MI, MIB, MachineInstr::NoFPExcept);
+
+ return MIB;
}
bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index cdf07310108a96..aa10fb56496231 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -254,8 +254,13 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Cond, Register TrueReg,
Register FalseReg) const override;
+ MachineInstr *optimizeLoadInstr(MachineInstr &MI,
+ const MachineRegisterInfo *MRI,
+ Register &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const override;
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
+
bool isPredicable(const MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
@@ -285,6 +290,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
Register VReg) const override;
MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
LiveIntervals *LIS) const override;
+
+ bool useMachineCombiner() const override { return true; }
+ bool isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const override;
+ std::optional<unsigned> getInverseOpcode(unsigned Opcode) const override;
+
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 245e3c3399a986..c29c54a6cb79de 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -139,7 +139,7 @@ let Predicates = [FeatureVector] in {
// LEY and LDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
- let mayLoad = 1 in {
+ let mayLoad = 1, canFoldAsLoad = 1 in {
def VL32 : UnaryAliasVRX<z_load, v32sb, bdxaddr12pair>;
def VL64 : UnaryAliasVRX<z_load, v64db, bdxaddr12pair>;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 1611436b01b7fa..6cb89ccff85e68 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -829,6 +829,18 @@ def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
// Floating-point negative absolute.
def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
+// Floating-point operations which will not participate in reassociation, and
+// therefore are candidates for reg/mem folding during isel.
+def z_any_fadd_noreassoc : PatFrag<(ops node:$src1, node:$src2),
+ (any_fadd node:$src1, node:$src2),
+ [{ return !shouldSelectForReassoc(N); }]>;
+def z_any_fsub_noreassoc : PatFrag<(ops node:$src1, node:$src2),
+ (any_fsub node:$src1, node:$src2),
+ [{ return !shouldSelectForReassoc(N); }]>;
+def z_any_fmul_noreassoc : PatFrag<(ops node:$src1, node:$src2),
+ (any_fmul node:$src1, node:$src2),
+ [{ return !shouldSelectForReassoc(N); }]>;
+
// Strict floating-point fragments.
def z_any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(z_strict_fcmp node:$lhs, node:$rhs),
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 2491bd2ee2c12c..dced64d6b21ac7 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -30,6 +30,11 @@
using namespace llvm;
+static cl::opt<bool> EnableMachineCombinerPass(
+ "systemz-machine-combiner",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
// NOLINTNEXTLINE(readability-identifier-naming)
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
// Register the target.
@@ -245,6 +250,10 @@ bool SystemZPassConfig::addInstSelector() {
bool SystemZPassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
+
+ if (EnableMachineCombinerPass)
+ addPass(&MachineCombinerID);
+
return true;
}
diff --git a/llvm/test/CodeGen/SystemZ/anyregcc.ll b/llvm/test/CodeGen/SystemZ/anyregcc.ll
index 76b9352f30049e..8f477c929781cc 100644
--- a/llvm/test/CodeGen/SystemZ/anyregcc.ll
+++ b/llvm/test/CodeGen/SystemZ/anyregcc.ll
@@ -323,37 +323,37 @@ entry:
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
-; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 13
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
-; Loc 9: Register
-; CHECK-NEXT: .byte 1
+; Loc 9: IndirectMem
+; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
-; CHECK-NEXT: .long 0
-; Loc 10: Register
-; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .long 344
+; Loc 10: IndirectMem
+; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
-; CHECK-NEXT: .long 0
-; Loc 11: Register
-; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .long 352
+; Loc 11: IndirectMem
+; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
-; CHECK-NEXT: .long 0
-; Loc 12: Register
-; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .long 360
+; Loc 12: IndirectMem
+; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
-; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 368
define i64 @anyreg_test2(ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5, ptr %a6, ptr %a7, ptr %a8, ptr %a9, ptr %a10, ptr %a11, ptr %a12) nounwind ssp uwtable {
entry:
%f = inttoptr i64 12297829382473034410 to ptr
diff --git a/llvm/test/CodeGen/SystemZ/foldmem-peep.mir b/llvm/test/CodeGen/SystemZ/foldmem-peep.mir
new file mode 100644
index 00000000000000..c6a244f4306161
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/foldmem-peep.mir
@@ -0,0 +1,105 @@
+# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z16 -start-before=peephole-opt \
+# RUN: -stop-after=peephole-opt %s -o - | FileCheck %s
+
+--- |
+ define double @f1(ptr %x, i32 %a, i32 %b, i32 %limit, ptr %dst) #0 {
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ ret double 0.0
+ }
+ define double @f2(ptr %x, i32 %a, i32 %b, i32 %limit, ptr %dst) #0 {
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ ret double 0.0
+ }
+
+...
+
+# Do not fold where CC is live.
+# CHECK: name: f1
+# CHECK: {{.*}} WFADB
+---
+name: f1
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: addr64bit }
+ - { id: 1, class: gr32bit }
+ - { id: 2, class: gr32bit }
+ - { id: 3, class: gr32bit }
+ - { id: 4, class: addr64bit }
+ - { id: 5, class: vr64bit }
+ - { id: 6, class: vr64bit }
+ - { id: 7, class: vr64bit }
+ - { id: 8, class: grx32bit }
+liveins:
+ - { reg: '$r2d', virtual-reg: '%0' }
+ - { reg: '$r3l', virtual-reg: '%1' }
+ - { reg: '$r4l', virtual-reg: '%2' }
+ - { reg: '$r5l', virtual-reg: '%3' }
+ - { reg: '$r6d', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $r2d, $r3l, $r4l, $r5l, $r6d
+
+ %4:addr64bit = COPY $r6d
+ %3:gr32bit = COPY $r5l
+ %2:gr32bit = COPY $r4l
+ %1:gr32bit = COPY $r3l
+ %0:addr64bit = COPY $r2d
+ CLFIMux %3, 42, implicit-def $cc
+ %5:vr64bit = VL64 %0, 0, $noreg :: (load (s64) from %ir.x)
+ %6:vr64bit = VL64 %0, 8, $noreg :: (load (s64) from %ir.arrayidx1)
+ %7:vr64bit = nsz arcp contract afn reassoc nofpexcept WFADB killed %6, killed %5, implicit $fpc
+ %8:grx32bit = SELRMux %2, %1, 14, 4, implicit $cc
+ STMux killed %8, %4, 0, $noreg :: (store (s32) into %ir.dst)
+ $f0d = COPY %7
+ Return implicit $f0d
+
+...
+
+# Do not fold where CC is live in.
+# CHECK: name: f2
+# CHECK: {{.*}} WFADB
+---
+name: f2
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: addr64bit }
+ - { id: 1, class: gr32bit }
+ - { id: 2, class: gr32bit }
+ - { id: 3, class: gr32bit }
+ - { id: 4, class: addr64bit }
+ - { id: 5, class: vr64bit }
+ - { id: 6, class: vr64bit }
+ - { id: 7, class: vr64bit }
+ - { id: 8, class: grx32bit }
+liveins:
+ - { reg: '$r2d', virtual-reg: '%0' }
+ - { reg: '$r3l', virtual-reg: '%1' }
+ - { reg: '$r4l', virtual-reg: '%2' }
+ - { reg: '$r5l', virtual-reg: '%3' }
+ - { reg: '$r6d', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $r2d, $r3l, $r4l, $r5l, $r6d, $cc
+
+ %4:addr64bit = COPY $r6d
+ %3:gr32bit = COPY $r5l
+ %2:gr32bit = COPY $r4l
+ %1:gr32bit = COPY $r3l
+ %0:addr64bit = COPY $r2d
+ %5:vr64bit = VL64 %0, 0, $noreg :: (load (s64) from %ir.x)
+ %6:vr64bit = VL64 %0, 8, $noreg :: (load (s64) from %ir.arrayidx1)
+ %7:vr64bit = nsz arcp contract afn reassoc nofpexcept WFADB killed %6, killed %5, implicit $fpc
+ %8:grx32bit = SELRMux %2, %1, 14, 4, implicit $cc
+ STMux killed %8, %4, 0, $noreg :: (store (s32) into %ir.dst)
+ $f0d = COPY %7
+ Return implicit $f0d
+
+...
diff --git a/llvm/test/CodeGen/SystemZ/fp-add-01.ll b/llvm/test/CodeGen/SystemZ/fp-add-01.ll
index f60fb8345b4a48..eb845bae9b804b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-add-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-add-01.ll
@@ -119,3 +119,15 @@ define float @f7(ptr %ptr0) {
ret float %add10
}
+
+; Check that reassociation flags do not get in the way of AEB.
+define float @f8(ptr %x) {
+; CHECK-LABEL: f8:
+; CHECK: aeb %f0
+entry:
+ %0 = load float, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds float, ptr %x, i64 1
+ %1 = load float, ptr %arrayidx1, align 8
+ %add = fadd reassoc nsz arcp contract afn float %1, %0
+ ret float %add
+}
diff --git a/llvm/test/CodeGen/SystemZ/fp-add-02.ll b/llvm/test/CodeGen/SystemZ/fp-add-02.ll
index bb12196fb848a5..7866f98240eab3 100644
--- a/llvm/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-add-02.ll
@@ -118,3 +118,17 @@ define double @f7(ptr %ptr0) {
ret double %add10
}
+
+; Check that reassociation flags do not get in the way of ADB.
+define double @f8(ptr %x) {
+; CHECK-LABEL: f8:
+; CHECK: ld %f0
+; CHECK: adb %f0
+; CHECK: br %r14
+entry:
+ %0 = load double, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ %1 = load double, ptr %arrayidx1, align 8
+ %add = fadd reassoc nsz arcp contract afn double %1, %0
+ ret double %add
+}
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
index 144e3208c5eb75..c5e66ff72c2a40 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
@@ -119,3 +119,15 @@ define float @f7(ptr %ptr0) {
ret float %mul10
}
+
+; Check that reassociation flags do not get in the way of MEEB.
+define float @f8(ptr %x) {
+; CHECK-LABEL: f8:
+; CHECK: meeb %f0
+entry:
+ %0 = load float, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds float, ptr %x, i64 1
+ %1 = load float, ptr %arrayidx1, align 8
+ %add = fmul reassoc nsz arcp contract afn float %1, %0
+ ret float %add
+}
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-03.ll b/llvm/test/CodeGen/SystemZ/fp-mul-03.ll
index dbd6975af41304..820fdbd6f5bdb2 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-03.ll
@@ -119,3 +119,17 @@ define double @f7(ptr %ptr0) {
ret double %mul10
}
+
+; Check that reassociation flags do not get in the way of MDB.
+define double @f8(ptr %x) {
+; CHECK-LABEL: f8:
+; CHECK: ld %f0
+; CHECK: mdb %f0
+; CHECK: br %r14
+entry:
+ %0 = load double, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ %1 = load double, ptr %arrayidx1, align 8
+ %add = fmul reassoc nsz arcp contract afn double %1, %0
+ ret double %add
+}
diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-01.ll b/llvm/test/CodeGen/SystemZ/fp-sub-01.ll
index a6e01112619ee8..e875fa3be735b0 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sub-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sub-01.ll
@@ -119,3 +119,15 @@ define float @f7(ptr %ptr0) {
ret float %sub10
}
+
+; Check that reassociation flags do not get in the way of SEB.
+define float @f8(ptr %x) {
+; CHECK-LABEL: f8:
+; CHECK: seb %f0
+entry:
+ %0 = load float, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds float, ptr %x, i64 1
+ %1 = load float, ptr %arrayidx1, align 8
+ %add = fsub reassoc nsz arcp contract afn float %1, %0
+ ret float %add
+}
diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-02.ll b/llvm/test/CodeGen/SystemZ/fp-sub-02.ll
index c564c2de31887d..3219b6e4be8f34 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sub-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sub-02.ll
@@ -119,3 +119,17 @@ define double @f7(ptr %ptr0) {
ret double %sub10
}
+
+; Check that reassociation flags do not get in the way of SDB.
+define double @f8(ptr %x) {
+; CHECK-LABEL: f8:
+; CHECK: ld %f0
+; CHECK: sdb %f0
+; CHECK: br %r14
+entry:
+ %0 = load double, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ %1 = load double, ptr %arrayidx1, align 8
+ %add = fsub reassoc nsz arcp contract afn double %1, %0
+ ret double %add
+}
diff --git a/llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll b/llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll
new file mode 100644
index 00000000000000..fdf1be68a5430e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll
@@ -0,0 +1,653 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 -verify-machineinstrs -O3 \
+; RUN: | FileCheck %s
+
+; Test reassociation of fp add, subtract and multiply.
+
+define double @fun0_fadd(ptr %x) {
+; CHECK-LABEL: fun0_fadd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld %f0, 0(%r2)
+; CHECK-NEXT: adb %f0, 8(%r2)
+; CHECK-NEXT: ld %f1, 24(%r2)
+; CHECK-NEXT: adb %f1, 16(%r2)
+; CHECK-NEXT: adbr %f0, %f1
+; CHECK-NEXT: ld %f1, 40(%r2)
+; CHECK-NEXT: adb %f1, 32(%r2)
+; CHECK-NEXT: adb %f1, 48(%r2)
+; CHECK-NEXT: adbr %f0, %f1
+; CHECK-NEXT: adb %f0, 56(%r2)
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load double, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ %1 = load double, ptr %arrayidx1, align 8
+ %add = fadd reassoc nsz arcp contract afn double %1, %0
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i64 2
+ %2 = load double, ptr %arrayidx2, align 8
+ %add3 = fadd reassoc nsz arcp contract afn double %add, %2
+ %arrayidx4 = getelementptr inbounds double, ptr %x, i64 3
+ %3 = load double, ptr %arrayidx4, align 8
+ %add5 = fadd reassoc nsz arcp contract afn double %add3, %3
+ %arrayidx6 = getelementptr inbounds double, ptr %x, i64 4
+ %4 = load double, ptr %arrayidx6, align 8
+ %add7 = fadd reassoc nsz arcp contract afn double %add5, %4
+ %arrayidx8 = getelementptr inbounds double, ptr %x, i64 5
+ %5 = load double, ptr %arrayidx8, align 8
+ %add9 = fadd reassoc nsz arcp contract afn double %add7, %5
+ %arrayidx10 = getelementptr inbounds double, ptr %x, i64 6
+ %6 = load double, ptr %arrayidx10, align 8
+ %add11 = fadd reassoc nsz arcp contract afn double %add9, %6
+ %arrayidx12 = getelementptr inbounds double, ptr %x, i64 7
+ %7 = load double, ptr %arrayidx12, align 8
+ %add13 = fadd reassoc nsz arcp contract afn double %add11, %7
+ ret double %add13
+}
+
+define float @fun1_fadd(ptr %x) {
+; CHECK-LABEL: fun1_fadd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lde %f0, 0(%r2)
+; CHECK-NEXT: aeb %f0, 4(%r2)
+; CHECK-NEXT: lde %f1, 12(%r2)
+; CHECK-NEXT: aeb %f1, 8(%r2)
+; CHECK-NEXT: aebr %f0, %f1
+; CHECK-NEXT: lde %f1, 20(%r2)
+; CHECK-NEXT: aeb %f1, 16(%r2)
+; CHECK-NEXT: aeb %f1, 24(%r2)
+; CHECK-NEXT: aebr %f0, %f1
+; CHECK-NEXT: aeb %f0, 28(%r2)
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load float, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds float, ptr %x, i64 1
+ %1 = load float, ptr %arrayidx1, align 8
+ %add = fadd reassoc nsz arcp contract afn float %1, %0
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 2
+ %2 = load float, ptr %arrayidx2, align 8
+ %add3 = fadd reassoc nsz arcp contract afn float %add, %2
+ %arrayidx4 = getelementptr inbounds float, ptr %x, i64 3
+ %3 = load float, ptr %arrayidx4, align 8
+ %add5 = fadd reassoc nsz arcp contract afn float %add3, %3
+ %arrayidx6 = getelementptr inbounds float, ptr %x, i64 4
+ %4 = load float, ptr %arrayidx6, align 8
+ %add7 = fadd reassoc nsz arcp contract afn float %add5, %4
+ %arrayidx8 = getelementptr inbounds float, ptr %x, i64 5
+ %5 = load float, ptr %arrayidx8, align 8
+ %add9 = fadd reassoc nsz arcp contract afn float %add7, %5
+ %arrayidx10 = getelementptr inbounds float, ptr %x, i64 6
+ %6 = load float, ptr %arrayidx10, align 8
+ %add11 = fadd reassoc nsz arcp contract afn float %add9, %6
+ %arrayidx12 = getelementptr inbounds float, ptr %x, i64 7
+ %7 = load float, ptr %arrayidx12, align 8
+ %add13 = fadd reassoc nsz arcp contract afn float %add11, %7
+ ret float %add13
+}
+
+define fp128 @fun2_fadd(ptr %x) {
+; CHECK-LABEL: fun2_fadd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 16(%r3), 3
+; CHECK-NEXT: wfaxb %v0, %v1, %v0
+; CHECK-NEXT: vl %v1, 32(%r3), 3
+; CHECK-NEXT: vl %v2, 48(%r3), 3
+; CHECK-NEXT: wfaxb %v1, %v1, %v2
+; CHECK-NEXT: wfaxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r3), 3
+; CHECK-NEXT: vl %v2, 80(%r3), 3
+; CHECK-NEXT: wfaxb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r3), 3
+; CHECK-NEXT: wfaxb %v1, %v1, %v2
+; CHECK-NEXT: wfaxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r3), 3
+; CHECK-NEXT: wfaxb %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load fp128, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds fp128, ptr %x, i64 1
+ %1 = load fp128, ptr %arrayidx1, align 8
+ %add = fadd reassoc nsz arcp contract afn fp128 %1, %0
+ %arrayidx2 = getelementptr inbounds fp128, ptr %x, i64 2
+ %2 = load fp128, ptr %arrayidx2, align 8
+ %add3 = fadd reassoc nsz arcp contract afn fp128 %add, %2
+ %arrayidx4 = getelementptr inbounds fp128, ptr %x, i64 3
+ %3 = load fp128, ptr %arrayidx4, align 8
+ %add5 = fadd reassoc nsz arcp contract afn fp128 %add3, %3
+ %arrayidx6 = getelementptr inbounds fp128, ptr %x, i64 4
+ %4 = load fp128, ptr %arrayidx6, align 8
+ %add7 = fadd reassoc nsz arcp contract afn fp128 %add5, %4
+ %arrayidx8 = getelementptr inbounds fp128, ptr %x, i64 5
+ %5 = load fp128, ptr %arrayidx8, align 8
+ %add9 = fadd reassoc nsz arcp contract afn fp128 %add7, %5
+ %arrayidx10 = getelementptr inbounds fp128, ptr %x, i64 6
+ %6 = load fp128, ptr %arrayidx10, align 8
+ %add11 = fadd reassoc nsz arcp contract afn fp128 %add9, %6
+ %arrayidx12 = getelementptr inbounds fp128, ptr %x, i64 7
+ %7 = load fp128, ptr %arrayidx12, align 8
+ %add13 = fadd reassoc nsz arcp contract afn fp128 %add11, %7
+ ret fp128 %add13
+}
+
+define <2 x double> @fun3_fadd(ptr %x) {
+; CHECK-LABEL: fun3_fadd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 16(%r2), 3
+; CHECK-NEXT: vfadb %v0, %v1, %v0
+; CHECK-NEXT: vl %v1, 32(%r2), 3
+; CHECK-NEXT: vl %v2, 48(%r2), 3
+; CHECK-NEXT: vfadb %v1, %v1, %v2
+; CHECK-NEXT: vfadb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r2), 3
+; CHECK-NEXT: vl %v2, 80(%r2), 3
+; CHECK-NEXT: vfadb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r2), 3
+; CHECK-NEXT: vfadb %v1, %v1, %v2
+; CHECK-NEXT: vfadb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r2), 3
+; CHECK-NEXT: vfadb %v24, %v0, %v1
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load <2 x double>, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds <2 x double>, ptr %x, i64 1
+ %1 = load <2 x double>, ptr %arrayidx1, align 8
+ %add = fadd reassoc nsz arcp contract afn <2 x double> %1, %0
+ %arrayidx2 = getelementptr inbounds <2 x double>, ptr %x, i64 2
+ %2 = load <2 x double>, ptr %arrayidx2, align 8
+ %add3 = fadd reassoc nsz arcp contract afn <2 x double> %add, %2
+ %arrayidx4 = getelementptr inbounds <2 x double>, ptr %x, i64 3
+ %3 = load <2 x double>, ptr %arrayidx4, align 8
+ %add5 = fadd reassoc nsz arcp contract afn <2 x double> %add3, %3
+ %arrayidx6 = getelementptr inbounds <2 x double>, ptr %x, i64 4
+ %4 = load <2 x double>, ptr %arrayidx6, align 8
+ %add7 = fadd reassoc nsz arcp contract afn <2 x double> %add5, %4
+ %arrayidx8 = getelementptr inbounds <2 x double>, ptr %x, i64 5
+ %5 = load <2 x double>, ptr %arrayidx8, align 8
+ %add9 = fadd reassoc nsz arcp contract afn <2 x double> %add7, %5
+ %arrayidx10 = getelementptr inbounds <2 x double>, ptr %x, i64 6
+ %6 = load <2 x double>, ptr %arrayidx10, align 8
+ %add11 = fadd reassoc nsz arcp contract afn <2 x double> %add9, %6
+ %arrayidx12 = getelementptr inbounds <2 x double>, ptr %x, i64 7
+ %7 = load <2 x double>, ptr %arrayidx12, align 8
+ %add13 = fadd reassoc nsz arcp contract afn <2 x double> %add11, %7
+ ret <2 x double> %add13
+}
+
+define <4 x float> @fun4_fadd(ptr %x) {
+; CHECK-LABEL: fun4_fadd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 16(%r2), 3
+; CHECK-NEXT: vfasb %v0, %v1, %v0
+; CHECK-NEXT: vl %v1, 32(%r2), 3
+; CHECK-NEXT: vl %v2, 48(%r2), 3
+; CHECK-NEXT: vfasb %v1, %v1, %v2
+; CHECK-NEXT: vfasb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r2), 3
+; CHECK-NEXT: vl %v2, 80(%r2), 3
+; CHECK-NEXT: vfasb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r2), 3
+; CHECK-NEXT: vfasb %v1, %v1, %v2
+; CHECK-NEXT: vfasb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r2), 3
+; CHECK-NEXT: vfasb %v24, %v0, %v1
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load <4 x float>, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds <4 x float>, ptr %x, i64 1
+ %1 = load <4 x float>, ptr %arrayidx1, align 8
+ %add = fadd reassoc nsz arcp contract afn <4 x float> %1, %0
+ %arrayidx2 = getelementptr inbounds <4 x float>, ptr %x, i64 2
+ %2 = load <4 x float>, ptr %arrayidx2, align 8
+ %add3 = fadd reassoc nsz arcp contract afn <4 x float> %add, %2
+ %arrayidx4 = getelementptr inbounds <4 x float>, ptr %x, i64 3
+ %3 = load <4 x float>, ptr %arrayidx4, align 8
+ %add5 = fadd reassoc nsz arcp contract afn <4 x float> %add3, %3
+ %arrayidx6 = getelementptr inbounds <4 x float>, ptr %x, i64 4
+ %4 = load <4 x float>, ptr %arrayidx6, align 8
+ %add7 = fadd reassoc nsz arcp contract afn <4 x float> %add5, %4
+ %arrayidx8 = getelementptr inbounds <4 x float>, ptr %x, i64 5
+ %5 = load <4 x float>, ptr %arrayidx8, align 8
+ %add9 = fadd reassoc nsz arcp contract afn <4 x float> %add7, %5
+ %arrayidx10 = getelementptr inbounds <4 x float>, ptr %x, i64 6
+ %6 = load <4 x float>, ptr %arrayidx10, align 8
+ %add11 = fadd reassoc nsz arcp contract afn <4 x float> %add9, %6
+ %arrayidx12 = getelementptr inbounds <4 x float>, ptr %x, i64 7
+ %7 = load <4 x float>, ptr %arrayidx12, align 8
+ %add13 = fadd reassoc nsz arcp contract afn <4 x float> %add11, %7
+ ret <4 x float> %add13
+}
+
+define double @fun5_fsub(ptr %x) {
+; CHECK-LABEL: fun5_fsub:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld %f0, 0(%r2)
+; CHECK-NEXT: sdb %f0, 8(%r2)
+; CHECK-NEXT: ld %f1, 24(%r2)
+; CHECK-NEXT: adb %f1, 16(%r2)
+; CHECK-NEXT: sdbr %f0, %f1
+; CHECK-NEXT: ld %f1, 40(%r2)
+; CHECK-NEXT: adb %f1, 32(%r2)
+; CHECK-NEXT: adb %f1, 48(%r2)
+; CHECK-NEXT: sdbr %f0, %f1
+; CHECK-NEXT: sdb %f0, 56(%r2)
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load double, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ %1 = load double, ptr %arrayidx1, align 8
+ %sub = fsub reassoc nsz arcp contract afn double %0, %1
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i64 2
+ %2 = load double, ptr %arrayidx2, align 8
+ %sub3 = fsub reassoc nsz arcp contract afn double %sub, %2
+ %arrayidx4 = getelementptr inbounds double, ptr %x, i64 3
+ %3 = load double, ptr %arrayidx4, align 8
+ %sub5 = fsub reassoc nsz arcp contract afn double %sub3, %3
+ %arrayidx6 = getelementptr inbounds double, ptr %x, i64 4
+ %4 = load double, ptr %arrayidx6, align 8
+ %sub7 = fsub reassoc nsz arcp contract afn double %sub5, %4
+ %arrayidx8 = getelementptr inbounds double, ptr %x, i64 5
+ %5 = load double, ptr %arrayidx8, align 8
+ %sub9 = fsub reassoc nsz arcp contract afn double %sub7, %5
+ %arrayidx10 = getelementptr inbounds double, ptr %x, i64 6
+ %6 = load double, ptr %arrayidx10, align 8
+ %sub11 = fsub reassoc nsz arcp contract afn double %sub9, %6
+ %arrayidx12 = getelementptr inbounds double, ptr %x, i64 7
+ %7 = load double, ptr %arrayidx12, align 8
+ %sub13 = fsub reassoc nsz arcp contract afn double %sub11, %7
+ ret double %sub13
+}
+
+define float @fun6_fsub(ptr %x) {
+; CHECK-LABEL: fun6_fsub:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lde %f0, 0(%r2)
+; CHECK-NEXT: seb %f0, 4(%r2)
+; CHECK-NEXT: lde %f1, 12(%r2)
+; CHECK-NEXT: aeb %f1, 8(%r2)
+; CHECK-NEXT: sebr %f0, %f1
+; CHECK-NEXT: lde %f1, 20(%r2)
+; CHECK-NEXT: aeb %f1, 16(%r2)
+; CHECK-NEXT: aeb %f1, 24(%r2)
+; CHECK-NEXT: sebr %f0, %f1
+; CHECK-NEXT: seb %f0, 28(%r2)
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load float, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds float, ptr %x, i64 1
+ %1 = load float, ptr %arrayidx1, align 8
+ %sub = fsub reassoc nsz arcp contract afn float %0, %1
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 2
+ %2 = load float, ptr %arrayidx2, align 8
+ %sub3 = fsub reassoc nsz arcp contract afn float %sub, %2
+ %arrayidx4 = getelementptr inbounds float, ptr %x, i64 3
+ %3 = load float, ptr %arrayidx4, align 8
+ %sub5 = fsub reassoc nsz arcp contract afn float %sub3, %3
+ %arrayidx6 = getelementptr inbounds float, ptr %x, i64 4
+ %4 = load float, ptr %arrayidx6, align 8
+ %sub7 = fsub reassoc nsz arcp contract afn float %sub5, %4
+ %arrayidx8 = getelementptr inbounds float, ptr %x, i64 5
+ %5 = load float, ptr %arrayidx8, align 8
+ %sub9 = fsub reassoc nsz arcp contract afn float %sub7, %5
+ %arrayidx10 = getelementptr inbounds float, ptr %x, i64 6
+ %6 = load float, ptr %arrayidx10, align 8
+ %sub11 = fsub reassoc nsz arcp contract afn float %sub9, %6
+ %arrayidx12 = getelementptr inbounds float, ptr %x, i64 7
+ %7 = load float, ptr %arrayidx12, align 8
+ %sub13 = fsub reassoc nsz arcp contract afn float %sub11, %7
+ ret float %sub13
+}
+
+define fp128 @fun7_fsub(ptr %x) {
+; CHECK-LABEL: fun7_fsub:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 16(%r3), 3
+; CHECK-NEXT: wfsxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 32(%r3), 3
+; CHECK-NEXT: vl %v2, 48(%r3), 3
+; CHECK-NEXT: wfaxb %v1, %v1, %v2
+; CHECK-NEXT: wfsxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r3), 3
+; CHECK-NEXT: vl %v2, 80(%r3), 3
+; CHECK-NEXT: wfaxb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r3), 3
+; CHECK-NEXT: wfaxb %v1, %v1, %v2
+; CHECK-NEXT: wfsxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r3), 3
+; CHECK-NEXT: wfsxb %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load fp128, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds fp128, ptr %x, i64 1
+ %1 = load fp128, ptr %arrayidx1, align 8
+ %sub = fsub reassoc nsz arcp contract afn fp128 %0, %1
+ %arrayidx2 = getelementptr inbounds fp128, ptr %x, i64 2
+ %2 = load fp128, ptr %arrayidx2, align 8
+ %sub3 = fsub reassoc nsz arcp contract afn fp128 %sub, %2
+ %arrayidx4 = getelementptr inbounds fp128, ptr %x, i64 3
+ %3 = load fp128, ptr %arrayidx4, align 8
+ %sub5 = fsub reassoc nsz arcp contract afn fp128 %sub3, %3
+ %arrayidx6 = getelementptr inbounds fp128, ptr %x, i64 4
+ %4 = load fp128, ptr %arrayidx6, align 8
+ %sub7 = fsub reassoc nsz arcp contract afn fp128 %sub5, %4
+ %arrayidx8 = getelementptr inbounds fp128, ptr %x, i64 5
+ %5 = load fp128, ptr %arrayidx8, align 8
+ %sub9 = fsub reassoc nsz arcp contract afn fp128 %sub7, %5
+ %arrayidx10 = getelementptr inbounds fp128, ptr %x, i64 6
+ %6 = load fp128, ptr %arrayidx10, align 8
+ %sub11 = fsub reassoc nsz arcp contract afn fp128 %sub9, %6
+ %arrayidx12 = getelementptr inbounds fp128, ptr %x, i64 7
+ %7 = load fp128, ptr %arrayidx12, align 8
+ %sub13 = fsub reassoc nsz arcp contract afn fp128 %sub11, %7
+ ret fp128 %sub13
+}
+
+define <2 x double> @fun8_fsub(ptr %x) {
+; CHECK-LABEL: fun8_fsub:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 16(%r2), 3
+; CHECK-NEXT: vfsdb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 32(%r2), 3
+; CHECK-NEXT: vl %v2, 48(%r2), 3
+; CHECK-NEXT: vfadb %v1, %v1, %v2
+; CHECK-NEXT: vfsdb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r2), 3
+; CHECK-NEXT: vl %v2, 80(%r2), 3
+; CHECK-NEXT: vfadb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r2), 3
+; CHECK-NEXT: vfadb %v1, %v1, %v2
+; CHECK-NEXT: vfsdb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r2), 3
+; CHECK-NEXT: vfsdb %v24, %v0, %v1
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load <2 x double>, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds <2 x double>, ptr %x, i64 1
+ %1 = load <2 x double>, ptr %arrayidx1, align 8
+ %sub = fsub reassoc nsz arcp contract afn <2 x double> %0, %1
+ %arrayidx2 = getelementptr inbounds <2 x double>, ptr %x, i64 2
+ %2 = load <2 x double>, ptr %arrayidx2, align 8
+ %sub3 = fsub reassoc nsz arcp contract afn <2 x double> %sub, %2
+ %arrayidx4 = getelementptr inbounds <2 x double>, ptr %x, i64 3
+ %3 = load <2 x double>, ptr %arrayidx4, align 8
+ %sub5 = fsub reassoc nsz arcp contract afn <2 x double> %sub3, %3
+ %arrayidx6 = getelementptr inbounds <2 x double>, ptr %x, i64 4
+ %4 = load <2 x double>, ptr %arrayidx6, align 8
+ %sub7 = fsub reassoc nsz arcp contract afn <2 x double> %sub5, %4
+ %arrayidx8 = getelementptr inbounds <2 x double>, ptr %x, i64 5
+ %5 = load <2 x double>, ptr %arrayidx8, align 8
+ %sub9 = fsub reassoc nsz arcp contract afn <2 x double> %sub7, %5
+ %arrayidx10 = getelementptr inbounds <2 x double>, ptr %x, i64 6
+ %6 = load <2 x double>, ptr %arrayidx10, align 8
+ %sub11 = fsub reassoc nsz arcp contract afn <2 x double> %sub9, %6
+ %arrayidx12 = getelementptr inbounds <2 x double>, ptr %x, i64 7
+ %7 = load <2 x double>, ptr %arrayidx12, align 8
+ %sub13 = fsub reassoc nsz arcp contract afn <2 x double> %sub11, %7
+ ret <2 x double> %sub13
+}
+
+define <4 x float> @fun9_fsub(ptr %x) {
+; CHECK-LABEL: fun9_fsub:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 16(%r2), 3
+; CHECK-NEXT: vfssb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 32(%r2), 3
+; CHECK-NEXT: vl %v2, 48(%r2), 3
+; CHECK-NEXT: vfasb %v1, %v1, %v2
+; CHECK-NEXT: vfssb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r2), 3
+; CHECK-NEXT: vl %v2, 80(%r2), 3
+; CHECK-NEXT: vfasb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r2), 3
+; CHECK-NEXT: vfasb %v1, %v1, %v2
+; CHECK-NEXT: vfssb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r2), 3
+; CHECK-NEXT: vfssb %v24, %v0, %v1
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load <4 x float>, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds <4 x float>, ptr %x, i64 1
+ %1 = load <4 x float>, ptr %arrayidx1, align 8
+ %sub = fsub reassoc nsz arcp contract afn <4 x float> %0, %1
+ %arrayidx2 = getelementptr inbounds <4 x float>, ptr %x, i64 2
+ %2 = load <4 x float>, ptr %arrayidx2, align 8
+ %sub3 = fsub reassoc nsz arcp contract afn <4 x float> %sub, %2
+ %arrayidx4 = getelementptr inbounds <4 x float>, ptr %x, i64 3
+ %3 = load <4 x float>, ptr %arrayidx4, align 8
+ %sub5 = fsub reassoc nsz arcp contract afn <4 x float> %sub3, %3
+ %arrayidx6 = getelementptr inbounds <4 x float>, ptr %x, i64 4
+ %4 = load <4 x float>, ptr %arrayidx6, align 8
+ %sub7 = fsub reassoc nsz arcp contract afn <4 x float> %sub5, %4
+ %arrayidx8 = getelementptr inbounds <4 x float>, ptr %x, i64 5
+ %5 = load <4 x float>, ptr %arrayidx8, align 8
+ %sub9 = fsub reassoc nsz arcp contract afn <4 x float> %sub7, %5
+ %arrayidx10 = getelementptr inbounds <4 x float>, ptr %x, i64 6
+ %6 = load <4 x float>, ptr %arrayidx10, align 8
+ %sub11 = fsub reassoc nsz arcp contract afn <4 x float> %sub9, %6
+ %arrayidx12 = getelementptr inbounds <4 x float>, ptr %x, i64 7
+ %7 = load <4 x float>, ptr %arrayidx12, align 8
+ %sub13 = fsub reassoc nsz arcp contract afn <4 x float> %sub11, %7
+ ret <4 x float> %sub13
+}
+
+define double @fun10_fmul(ptr %x) {
+; CHECK-LABEL: fun10_fmul:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld %f0, 8(%r2)
+; CHECK-NEXT: mdb %f0, 0(%r2)
+; CHECK-NEXT: ld %f1, 24(%r2)
+; CHECK-NEXT: mdb %f1, 16(%r2)
+; CHECK-NEXT: mdbr %f0, %f1
+; CHECK-NEXT: ld %f1, 40(%r2)
+; CHECK-NEXT: mdb %f1, 32(%r2)
+; CHECK-NEXT: mdb %f1, 48(%r2)
+; CHECK-NEXT: mdbr %f0, %f1
+; CHECK-NEXT: mdb %f0, 56(%r2)
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load double, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1
+ %1 = load double, ptr %arrayidx1, align 8
+ %mul = fmul reassoc nsz arcp contract afn double %0, %1
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i64 2
+ %2 = load double, ptr %arrayidx2, align 8
+ %mul3 = fmul reassoc nsz arcp contract afn double %mul, %2
+ %arrayidx4 = getelementptr inbounds double, ptr %x, i64 3
+ %3 = load double, ptr %arrayidx4, align 8
+ %mul5 = fmul reassoc nsz arcp contract afn double %mul3, %3
+ %arrayidx6 = getelementptr inbounds double, ptr %x, i64 4
+ %4 = load double, ptr %arrayidx6, align 8
+ %mul7 = fmul reassoc nsz arcp contract afn double %mul5, %4
+ %arrayidx8 = getelementptr inbounds double, ptr %x, i64 5
+ %5 = load double, ptr %arrayidx8, align 8
+ %mul9 = fmul reassoc nsz arcp contract afn double %mul7, %5
+ %arrayidx10 = getelementptr inbounds double, ptr %x, i64 6
+ %6 = load double, ptr %arrayidx10, align 8
+ %mul11 = fmul reassoc nsz arcp contract afn double %mul9, %6
+ %arrayidx12 = getelementptr inbounds double, ptr %x, i64 7
+ %7 = load double, ptr %arrayidx12, align 8
+ %mul13 = fmul reassoc nsz arcp contract afn double %mul11, %7
+ ret double %mul13
+}
+
+define float @fun11_fmul(ptr %x) {
+; CHECK-LABEL: fun11_fmul:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lde %f0, 4(%r2)
+; CHECK-NEXT: meeb %f0, 0(%r2)
+; CHECK-NEXT: lde %f1, 12(%r2)
+; CHECK-NEXT: meeb %f1, 8(%r2)
+; CHECK-NEXT: meebr %f0, %f1
+; CHECK-NEXT: lde %f1, 20(%r2)
+; CHECK-NEXT: meeb %f1, 16(%r2)
+; CHECK-NEXT: meeb %f1, 24(%r2)
+; CHECK-NEXT: meebr %f0, %f1
+; CHECK-NEXT: meeb %f0, 28(%r2)
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load float, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds float, ptr %x, i64 1
+ %1 = load float, ptr %arrayidx1, align 8
+ %mul = fmul reassoc nsz arcp contract afn float %0, %1
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 2
+ %2 = load float, ptr %arrayidx2, align 8
+ %mul3 = fmul reassoc nsz arcp contract afn float %mul, %2
+ %arrayidx4 = getelementptr inbounds float, ptr %x, i64 3
+ %3 = load float, ptr %arrayidx4, align 8
+ %mul5 = fmul reassoc nsz arcp contract afn float %mul3, %3
+ %arrayidx6 = getelementptr inbounds float, ptr %x, i64 4
+ %4 = load float, ptr %arrayidx6, align 8
+ %mul7 = fmul reassoc nsz arcp contract afn float %mul5, %4
+ %arrayidx8 = getelementptr inbounds float, ptr %x, i64 5
+ %5 = load float, ptr %arrayidx8, align 8
+ %mul9 = fmul reassoc nsz arcp contract afn float %mul7, %5
+ %arrayidx10 = getelementptr inbounds float, ptr %x, i64 6
+ %6 = load float, ptr %arrayidx10, align 8
+ %mul11 = fmul reassoc nsz arcp contract afn float %mul9, %6
+ %arrayidx12 = getelementptr inbounds float, ptr %x, i64 7
+ %7 = load float, ptr %arrayidx12, align 8
+ %mul13 = fmul reassoc nsz arcp contract afn float %mul11, %7
+ ret float %mul13
+}
+
+define fp128 @fun12_fmul(ptr %x) {
+; CHECK-LABEL: fun12_fmul:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 16(%r3), 3
+; CHECK-NEXT: wfmxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 32(%r3), 3
+; CHECK-NEXT: vl %v2, 48(%r3), 3
+; CHECK-NEXT: wfmxb %v1, %v1, %v2
+; CHECK-NEXT: wfmxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r3), 3
+; CHECK-NEXT: vl %v2, 80(%r3), 3
+; CHECK-NEXT: wfmxb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r3), 3
+; CHECK-NEXT: wfmxb %v1, %v1, %v2
+; CHECK-NEXT: wfmxb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r3), 3
+; CHECK-NEXT: wfmxb %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load fp128, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds fp128, ptr %x, i64 1
+ %1 = load fp128, ptr %arrayidx1, align 8
+ %mul = fmul reassoc nsz arcp contract afn fp128 %0, %1
+ %arrayidx2 = getelementptr inbounds fp128, ptr %x, i64 2
+ %2 = load fp128, ptr %arrayidx2, align 8
+ %mul3 = fmul reassoc nsz arcp contract afn fp128 %mul, %2
+ %arrayidx4 = getelementptr inbounds fp128, ptr %x, i64 3
+ %3 = load fp128, ptr %arrayidx4, align 8
+ %mul5 = fmul reassoc nsz arcp contract afn fp128 %mul3, %3
+ %arrayidx6 = getelementptr inbounds fp128, ptr %x, i64 4
+ %4 = load fp128, ptr %arrayidx6, align 8
+ %mul7 = fmul reassoc nsz arcp contract afn fp128 %mul5, %4
+ %arrayidx8 = getelementptr inbounds fp128, ptr %x, i64 5
+ %5 = load fp128, ptr %arrayidx8, align 8
+ %mul9 = fmul reassoc nsz arcp contract afn fp128 %mul7, %5
+ %arrayidx10 = getelementptr inbounds fp128, ptr %x, i64 6
+ %6 = load fp128, ptr %arrayidx10, align 8
+ %mul11 = fmul reassoc nsz arcp contract afn fp128 %mul9, %6
+ %arrayidx12 = getelementptr inbounds fp128, ptr %x, i64 7
+ %7 = load fp128, ptr %arrayidx12, align 8
+ %mul13 = fmul reassoc nsz arcp contract afn fp128 %mul11, %7
+ ret fp128 %mul13
+}
+
+define <2 x double> @fun13_fmul(ptr %x) {
+; CHECK-LABEL: fun13_fmul:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 16(%r2), 3
+; CHECK-NEXT: vfmdb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 32(%r2), 3
+; CHECK-NEXT: vl %v2, 48(%r2), 3
+; CHECK-NEXT: vfmdb %v1, %v1, %v2
+; CHECK-NEXT: vfmdb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r2), 3
+; CHECK-NEXT: vl %v2, 80(%r2), 3
+; CHECK-NEXT: vfmdb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r2), 3
+; CHECK-NEXT: vfmdb %v1, %v1, %v2
+; CHECK-NEXT: vfmdb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r2), 3
+; CHECK-NEXT: vfmdb %v24, %v0, %v1
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load <2 x double>, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds <2 x double>, ptr %x, i64 1
+ %1 = load <2 x double>, ptr %arrayidx1, align 8
+ %mul = fmul reassoc nsz arcp contract afn <2 x double> %0, %1
+ %arrayidx2 = getelementptr inbounds <2 x double>, ptr %x, i64 2
+ %2 = load <2 x double>, ptr %arrayidx2, align 8
+ %mul3 = fmul reassoc nsz arcp contract afn <2 x double> %mul, %2
+ %arrayidx4 = getelementptr inbounds <2 x double>, ptr %x, i64 3
+ %3 = load <2 x double>, ptr %arrayidx4, align 8
+ %mul5 = fmul reassoc nsz arcp contract afn <2 x double> %mul3, %3
+ %arrayidx6 = getelementptr inbounds <2 x double>, ptr %x, i64 4
+ %4 = load <2 x double>, ptr %arrayidx6, align 8
+ %mul7 = fmul reassoc nsz arcp contract afn <2 x double> %mul5, %4
+ %arrayidx8 = getelementptr inbounds <2 x double>, ptr %x, i64 5
+ %5 = load <2 x double>, ptr %arrayidx8, align 8
+ %mul9 = fmul reassoc nsz arcp contract afn <2 x double> %mul7, %5
+ %arrayidx10 = getelementptr inbounds <2 x double>, ptr %x, i64 6
+ %6 = load <2 x double>, ptr %arrayidx10, align 8
+ %mul11 = fmul reassoc nsz arcp contract afn <2 x double> %mul9, %6
+ %arrayidx12 = getelementptr inbounds <2 x double>, ptr %x, i64 7
+ %7 = load <2 x double>, ptr %arrayidx12, align 8
+ %mul13 = fmul reassoc nsz arcp contract afn <2 x double> %mul11, %7
+ ret <2 x double> %mul13
+}
+
+define <4 x float> @fun14_fmul(ptr %x) {
+; CHECK-LABEL: fun14_fmul:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vl %v1, 16(%r2), 3
+; CHECK-NEXT: vfmsb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 32(%r2), 3
+; CHECK-NEXT: vl %v2, 48(%r2), 3
+; CHECK-NEXT: vfmsb %v1, %v1, %v2
+; CHECK-NEXT: vfmsb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 64(%r2), 3
+; CHECK-NEXT: vl %v2, 80(%r2), 3
+; CHECK-NEXT: vfmsb %v1, %v1, %v2
+; CHECK-NEXT: vl %v2, 96(%r2), 3
+; CHECK-NEXT: vfmsb %v1, %v1, %v2
+; CHECK-NEXT: vfmsb %v0, %v0, %v1
+; CHECK-NEXT: vl %v1, 112(%r2), 3
+; CHECK-NEXT: vfmsb %v24, %v0, %v1
+; CHECK-NEXT: br %r14
+entry:
+ %0 = load <4 x float>, ptr %x, align 8
+ %arrayidx1 = getelementptr inbounds <4 x float>, ptr %x, i64 1
+ %1 = load <4 x float>, ptr %arrayidx1, align 8
+ %mul = fmul reassoc nsz arcp contract afn <4 x float> %0, %1
+ %arrayidx2 = getelementptr inbounds <4 x float>, ptr %x, i64 2
+ %2 = load <4 x float>, ptr %arrayidx2, align 8
+ %mul3 = fmul reassoc nsz arcp contract afn <4 x float> %mul, %2
+ %arrayidx4 = getelementptr inbounds <4 x float>, ptr %x, i64 3
+ %3 = load <4 x float>, ptr %arrayidx4, align 8
+ %mul5 = fmul reassoc nsz arcp contract afn <4 x float> %mul3, %3
+ %arrayidx6 = getelementptr inbounds <4 x float>, ptr %x, i64 4
+ %4 = load <4 x float>, ptr %arrayidx6, align 8
+ %mul7 = fmul reassoc nsz arcp contract afn <4 x float> %mul5, %4
+ %arrayidx8 = getelementptr inbounds <4 x float>, ptr %x, i64 5
+ %5 = load <4 x float>, ptr %arrayidx8, align 8
+ %mul9 = fmul reassoc nsz arcp contract afn <4 x float> %mul7, %5
+ %arrayidx10 = getelementptr inbounds <4 x float>, ptr %x, i64 6
+ %6 = load <4 x float>, ptr %arrayidx10, align 8
+ %mul11 = fmul reassoc nsz arcp contract afn <4 x float> %mul9, %6
+ %arrayidx12 = getelementptr inbounds <4 x float>, ptr %x, i64 7
+ %7 = load <4 x float>, ptr %arrayidx12, align 8
+ %mul13 = fmul reassoc nsz arcp contract afn <4 x float> %mul11, %7
+ ret <4 x float> %mul13
+}
diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll
index 88c7336037c9c4..6156b7f2fc5a18 100644
--- a/llvm/test/CodeGen/SystemZ/stackmap.ll
+++ b/llvm/test/CodeGen/SystemZ/stackmap.ll
@@ -38,10 +38,10 @@
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad spilledValue
-; CHECK-NEXT: .quad 240
+; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad spilledStackMapValue
-; CHECK-NEXT: .quad 200
+; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad spillSubReg
; CHECK-NEXT: .quad 168
More information about the llvm-commits
mailing list