[llvm] e8425b2 - [PowerPC] add store (load float*) pattern to isProfitableToHoist

Tue Jul 21 17:55:27 PDT 2020

Author: Chen Zheng
Date: 2020-07-21T20:55:13-04:00
New Revision: e8425b27fec355192c0ee82c4dd82874d7b10591

URL: https://github.com/llvm/llvm-project/commit/e8425b27fec355192c0ee82c4dd82874d7b10591
DIFF: https://github.com/llvm/llvm-project/commit/e8425b27fec355192c0ee82c4dd82874d7b10591.diff

LOG: [PowerPC] add store (load float*) pattern to isProfitableToHoist

store (load float*) can be optimized to store(load i32*) in InstCombine pass.

Add store (load float*) to isProfitableToHoist to make sure we don't break
the opt in InstCombine pass.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D82341

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 126364ab1943..f8d7ab87f35c 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16381,31 +16381,56 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
   }
 }
 
-// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
-// FIXME: add more patterns which are profitable to hoist.
+// FIXME: add more patterns which are not profitable to hoist.
 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
-  if (I->getOpcode() != Instruction::FMul)
-    return true;
-
   if (!I->hasOneUse())
     return true;
 
   Instruction *User = I->user_back();
   assert(User && "A single use instruction with no uses.");
 
-  if (User->getOpcode() != Instruction::FSub &&
-      User->getOpcode() != Instruction::FAdd)
-    return true;
+  switch (I->getOpcode()) {
+  case Instruction::FMul: {
+    // Don't break FMA, PowerPC prefers FMA.
+    if (User->getOpcode() != Instruction::FSub &&
+        User->getOpcode() != Instruction::FAdd)
+      return true;
 
-  const TargetOptions &Options = getTargetMachine().Options;
-  const Function *F = I->getFunction();
-  const DataLayout &DL = F->getParent()->getDataLayout();
-  Type *Ty = User->getOperand(0)->getType();
-
-  return !(
-      isFMAFasterThanFMulAndFAdd(*F, Ty) &&
-      isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
-      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+    const TargetOptions &Options = getTargetMachine().Options;
+    const Function *F = I->getFunction();
+    const DataLayout &DL = F->getParent()->getDataLayout();
+    Type *Ty = User->getOperand(0)->getType();
+
+    return !(
+        isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+        isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+        (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+  }
+  case Instruction::Load: {
+    // Don't break "store (load float*)" pattern, this pattern will be combined
+    // to "store (load int32)" in later InstCombine pass. See function
+    // combineLoadToOperationType. On PowerPC, loading a float point takes more
+    // cycles than loading a 32 bit integer.
+    LoadInst *LI = cast<LoadInst>(I);
+    // For the loads that combineLoadToOperationType does nothing, like
+    // ordered load, it should be profitable to hoist them.
+    // For swifterror load, it can only be used for pointer to pointer type, so
+    // later type check should get rid of this case.
+    if (!LI->isUnordered())
+      return true;
+
+    if (User->getOpcode() != Instruction::Store)
+      return true;
+
+    if (I->getType()->getTypeID() != Type::FloatTyID)
+      return true;
+
+    return false;
+  }
+  default:
+    return true;
+  }
+  return true;
 }
 
 const MCPhysReg *

diff  --git a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll
index b1e1d85faf29..d85f532aa5c4 100644
--- a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll
+++ b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll
@@ -3,7 +3,7 @@
 define float @foo(float* %src, float* %dest, i32 signext %count, i32 signext %cond) {
 ; CHECK-LABEL: @foo(
 ; CHECK-LABEL: entry:
-; CHECK:  %0 = load float, float* %arrayidx, align 4
+; CHECK-NOT:  load float
 entry:
   %cmp = icmp sgt i32 %cond, 10
   %idxprom = sext i32 %count to i64
@@ -11,14 +11,15 @@ entry:
   br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK-LABEL: if.then:
-; CHECK-NOT:   load float
+; CHECK:  %0 = load float, float* %arrayidx, align 4
 if.then:                                          ; preds = %entry
   %0 = load float, float* %arrayidx, align 4
   %res = fmul float %0, 3.000000e+00
   br label %if.end
 
 ; CHECK-LABEL: if.else:
-; CHECK-NOT:   load float
+; CHECK:   %1 = load float, float* %arrayidx, align 4
+; CHECK:   store float %1, float* %arrayidx4, align 4
 if.else:                                          ; preds = %entry
   %1 = load float, float* %arrayidx, align 4
   %idxprom3 = sext i32 %count to i64