[llvm] 3f85134 - [PowerPC] implement target hook isProfitableToHoist

Wed Mar 18 21:17:40 PDT 2020

Author: Chen Zheng
Date: 2020-03-19T00:17:25-04:00
New Revision: 3f85134d710c1cdfd6bd3175756aee6ac267a240

URL: https://github.com/llvm/llvm-project/commit/3f85134d710c1cdfd6bd3175756aee6ac267a240
DIFF: https://github.com/llvm/llvm-project/commit/3f85134d710c1cdfd6bd3175756aee6ac267a240.diff

LOG: [PowerPC] implement target hook isProfitableToHoist

On Powerpc fma is faster than fadd + fmul for some types,
(PPCTargetLowering::isFMAFasterThanFMulAndFAdd). we should implement target
hook isProfitableToHoist to prevent simplifyCFGpass from breaking fma
pattern by hoisting fmul to predecessor block.

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D76207

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index efd7e5b602a9..e9470907c289 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15385,6 +15385,33 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
   }
 }
 
+// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
+// FIXME: add more patterns which are profitable to hoist.
+bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
+  if (I->getOpcode() != Instruction::FMul)
+    return true;
+
+  if (!I->hasOneUse())
+    return true;
+
+  Instruction *User = I->user_back();
+  assert(User && "A single use instruction with no uses.");
+
+  if (User->getOpcode() != Instruction::FSub &&
+      User->getOpcode() == Instruction::FAdd)
+    return true;
+
+  const TargetOptions &Options = getTargetMachine().Options;
+  const Function *F = I->getFunction();
+  const DataLayout &DL = F->getParent()->getDataLayout();
+  Type *Ty = User->getOperand(0)->getType();
+
+  return !(
+      isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+      isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+}
+
 const MCPhysReg *
 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
   // LR is a callee-save register, but we must treat it as clobbered by any call

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 89adc607303a..70bf4fbfce1d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -909,6 +909,12 @@ namespace llvm {
 
     bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
 
+    /// isProfitableToHoist - Check if it is profitable to hoist instruction
+    /// \p I to its dominator block.
+    /// For example, it is not profitable if \p I and it's only user can form a
+    /// FMA instruction, because Powerpc prefers FMADD.
+    bool isProfitableToHoist(Instruction *I) const override;
+
     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
     // Should we expand the build vector with shuffles?

diff  --git a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll
index e72413e8b308..93fe8a201907 100644
--- a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll
+++ b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll
@@ -11,14 +11,15 @@ define double @_Z3fooRdS_S_S_(double* dereferenceable(8) %x, double* dereference
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load double, double* [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[MUL:%.*]] = fadd fast double 1.000000e+00, [[TMP3]]
 ; CHECK-NEXT:    store double [[MUL]], double* [[Y]], align 8
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[SUB1:%.*]] = fsub fast double [[TMP3]], [[TMP0]]
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[SUB1:%.*]] = fsub fast double [[MUL1]], [[TMP0]]
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr double, double* [[Y]], i32 1
 ; CHECK-NEXT:    store double [[SUB1]], double* [[GEP1]], align 8
 ; CHECK-NEXT:    br label [[IF_END]]