[llvm] r319980 - [AArch64] Add patterns to replace fsub fmul with fma fneg.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 14:48:37 PST 2017
Author: fhahn
Date: Wed Dec 6 14:48:36 2017
New Revision: 319980
URL: http://llvm.org/viewvc/llvm-project?rev=319980&view=rev
Log:
[AArch64] Add patterns to replace fsub fmul with fma fneg.
Summary:
This patch adds MachineCombiner patterns for transforming
(fsub (fmul x y) z) into (fma x y (fneg z)). This has a lower
latency on micro architectures where fneg is cheap.
Patch based on work by George Steed.
Reviewers: rengolin, joelkevinjones, joel_k_jones, evandro, efriedma
Reviewed By: evandro
Subscribers: aemerson, javed.absar, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D40306
Added:
llvm/trunk/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
Modified:
llvm/trunk/include/llvm/CodeGen/MachineCombinerPattern.h
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
Modified: llvm/trunk/include/llvm/CodeGen/MachineCombinerPattern.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineCombinerPattern.h?rev=319980&r1=319979&r2=319980&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineCombinerPattern.h (original)
+++ llvm/trunk/include/llvm/CodeGen/MachineCombinerPattern.h Wed Dec 6 14:48:36 2017
@@ -68,12 +68,18 @@ enum class MachineCombinerPattern {
FMLAv4i32_indexed_OP2,
FMLSv1i32_indexed_OP2,
FMLSv1i64_indexed_OP2,
- FMLSv2i32_indexed_OP2,
- FMLSv2i64_indexed_OP2,
+ FMLSv2f32_OP1,
FMLSv2f32_OP2,
+ FMLSv2f64_OP1,
FMLSv2f64_OP2,
- FMLSv4i32_indexed_OP2,
- FMLSv4f32_OP2
+ FMLSv2i32_indexed_OP1,
+ FMLSv2i32_indexed_OP2,
+ FMLSv2i64_indexed_OP1,
+ FMLSv2i64_indexed_OP2,
+ FMLSv4f32_OP1,
+ FMLSv4f32_OP2,
+ FMLSv4i32_indexed_OP1,
+ FMLSv4i32_indexed_OP2
};
} // end namespace llvm
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=319980&r1=319979&r2=319980&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Wed Dec 6 14:48:36 2017
@@ -3672,6 +3672,15 @@ static bool getFMAPatterns(MachineInstr
}
break;
case AArch64::FSUBv2f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
@@ -3683,6 +3692,15 @@ static bool getFMAPatterns(MachineInstr
}
break;
case AArch64::FSUBv2f64:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f64)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i64_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
@@ -3694,6 +3712,15 @@ static bool getFMAPatterns(MachineInstr
}
break;
case AArch64::FSUBv4f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv4i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
@@ -3790,12 +3817,15 @@ enum class FMAInstKind { Default, Indexe
/// \param MaddOpc the opcode fo the f|madd instruction
/// \param RC Register class of operands
/// \param kind of fma instruction (addressing mode) to be generated
+/// \param ReplacedAddend is the result register from the instruction
+/// replacing the non-combined operand, if any.
static MachineInstr *
genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
unsigned MaddOpc, const TargetRegisterClass *RC,
- FMAInstKind kind = FMAInstKind::Default) {
+ FMAInstKind kind = FMAInstKind::Default,
+ const unsigned *ReplacedAddend = nullptr) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
@@ -3805,8 +3835,17 @@ genFusedMultiply(MachineFunction &MF, Ma
bool Src0IsKill = MUL->getOperand(1).isKill();
unsigned SrcReg1 = MUL->getOperand(2).getReg();
bool Src1IsKill = MUL->getOperand(2).isKill();
- unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
- bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+
+ unsigned SrcReg2;
+ bool Src2IsKill;
+ if (ReplacedAddend) {
+ // If we just generated a new addend, we must be it's only use.
+ SrcReg2 = *ReplacedAddend;
+ Src2IsKill = true;
+ } else {
+ SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
+ Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+ }
if (TargetRegisterInfo::isVirtualRegister(ResultReg))
MRI.constrainRegClass(ResultReg, RC);
@@ -4326,6 +4365,66 @@ void AArch64InstrInfo::genAlternativeCod
FMAInstKind::Accumulator);
}
break;
+ case MachineCombinerPattern::FMLSv2f32_OP1:
+ case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
+ RC = &AArch64::FPR64RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
+ Opc = AArch64::FMLAv2i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv2f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv4f32_OP1:
+ case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
+ RC = &AArch64::FPR128RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
+ Opc = AArch64::FMLAv4i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv4f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv2f64_OP1:
+ case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
+ RC = &AArch64::FPR128RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
+ Opc = AArch64::FMLAv2i64_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv2f64;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
Added: llvm/trunk/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir?rev=319980&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir Wed Dec 6 14:48:36 2017
@@ -0,0 +1,82 @@
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefix=UNPROFITABLE %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
+#
+name: f1_2s
+registers:
+ - { id: 0, class: fpr64 }
+ - { id: 1, class: fpr64 }
+ - { id: 2, class: fpr64 }
+ - { id: 3, class: fpr64 }
+ - { id: 4, class: fpr64 }
+body: |
+ bb.0.entry:
+ %2:fpr64 = COPY %d2
+ %1:fpr64 = COPY %d1
+ %0:fpr64 = COPY %d0
+ %3:fpr64 = FMULv2f32 %0, %1
+ %4:fpr64 = FSUBv2f32 killed %3, %2
+ %d0 = COPY %4
+ RET_ReallyLR implicit %d0
+
+...
+# UNPROFITABLE-LABEL: name: f1_2s
+# UNPROFITABLE: %3:fpr64 = FMULv2f32 %0, %1
+# UNPROFITABLE-NEXT: FSUBv2f32 killed %3, %2
+#
+# PROFITABLE-LABEL: name: f1_2s
+# PROFITABLE: %5:fpr64 = FNEGv2f32 %2
+# PROFITABLE-NEXT: FMLAv2f32 killed %5, %0, %1
+---
+name: f1_4s
+registers:
+ - { id: 0, class: fpr128 }
+ - { id: 1, class: fpr128 }
+ - { id: 2, class: fpr128 }
+ - { id: 3, class: fpr128 }
+ - { id: 4, class: fpr128 }
+body: |
+ bb.0.entry:
+ %2:fpr128 = COPY %q2
+ %1:fpr128 = COPY %q1
+ %0:fpr128 = COPY %q0
+ %3:fpr128 = FMULv4f32 %0, %1
+ %4:fpr128 = FSUBv4f32 killed %3, %2
+ %q0 = COPY %4
+ RET_ReallyLR implicit %q0
+
+...
+# UNPROFITABLE-LABEL: name: f1_4s
+# UNPROFITABLE: %3:fpr128 = FMULv4f32 %0, %1
+# UNPROFITABLE-NEXT: FSUBv4f32 killed %3, %2
+#
+# PROFITABLE-LABEL: name: f1_4s
+# PROFITABLE: %5:fpr128 = FNEGv4f32 %2
+# PROFITABLE-NEXT: FMLAv4f32 killed %5, %0, %1
+---
+name: f1_2d
+registers:
+ - { id: 0, class: fpr128 }
+ - { id: 1, class: fpr128 }
+ - { id: 2, class: fpr128 }
+ - { id: 3, class: fpr128 }
+ - { id: 4, class: fpr128 }
+body: |
+ bb.0.entry:
+ %2:fpr128 = COPY %q2
+ %1:fpr128 = COPY %q1
+ %0:fpr128 = COPY %q0
+ %3:fpr128 = FMULv2f64 %0, %1
+ %4:fpr128 = FSUBv2f64 killed %3, %2
+ %q0 = COPY %4
+ RET_ReallyLR implicit %q0
+
+...
+# UNPROFITABLE-LABEL: name: f1_2d
+# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1
+# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2
+#
+# PROFITABLE-LABEL: name: f1_2d
+# PROFITABLE: %5:fpr128 = FNEGv2f64 %2
+# PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1
More information about the llvm-commits
mailing list