[llvm] 467b1f1 - [SimplifyCFG] Allow hoisting terminators only with HoistCommonInsts=false.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 13 02:33:53 PDT 2021


Author: Florian Hahn
Date: 2021-04-13T10:33:35+01:00
New Revision: 467b1f1cd2f2774714ce59919702c3963914b6a8

URL: https://github.com/llvm/llvm-project/commit/467b1f1cd2f2774714ce59919702c3963914b6a8
DIFF: https://github.com/llvm/llvm-project/commit/467b1f1cd2f2774714ce59919702c3963914b6a8.diff

LOG: [SimplifyCFG] Allow hoisting terminators only with HoistCommonInsts=false.

As a side-effect of the change to default HoistCommonInsts to false
early in the pipeline, we fail to convert conditional branch & phis to
selects early on, which prevents vectorization for loops that contain
conditional branches that effectively are selects (or if the loop gets
vectorized, it will get vectorized very inefficiently).

This patch updates SimplifyCFG to perform hoisting if the only
instruction in both BBs is an equal branch. In this case, the only
additional instructions are selects for phis, which should be cheap.

Even though we perform hoisting, the benefits of this kind of hoisting
should by far outweigh the negatives.

For example, the loop in the code below will not get vectorized on
AArch64 with the current default, but will with the patch. This is a
fundamental pattern we should definitely vectorize. Besides that, I
think the select variants should be easier to use for reasoning across
other passes as well.

https://clang.godbolt.org/z/sbjd8Wshx

```
double clamp(double v) {
  if (v < 0.0)
    return 0.0;
  if (v > 6.0)
    return 6.0;
  return v;
}

void loop(double* X, double *Y) {
  for (unsigned i = 0; i < 20000; i++) {
    X[i] = clamp(Y[i]);
  }
}
```

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D100329

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
    llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7f4736c4b550..9fdcb76aafda 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -253,7 +253,8 @@ class SimplifyCFGOpt {
   bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
                                              IRBuilder<> &Builder);
 
-  bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI);
+  bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
+                             bool EqTermsOnly);
   bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
                               const TargetTransformInfo &TTI);
   bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
@@ -1388,9 +1389,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
 
 /// Given a conditional branch that goes to BB1 and BB2, hoist any common code
 /// in the two blocks up into the branch block. The caller of this function
-/// guarantees that BI's block dominates BB1 and BB2.
+/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
+/// only perform hoisting in case both blocks only contain a terminator. In that
+/// case, only the original BI will be replaced and selects for PHIs are added.
 bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
-                                           const TargetTransformInfo &TTI) {
+                                           const TargetTransformInfo &TTI,
+                                           bool EqTermsOnly) {
   // This does very trivial matching, with limited scanning, to find identical
   // instructions in the two blocks.  In particular, we don't want to get into
   // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
@@ -1427,6 +1431,16 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
       ++NumHoistCommonCode;
   });
 
+  // Check if only hoisting terminators is allowed. This does not add new
+  // instructions to the hoist location.
+  if (EqTermsOnly) {
+    if (!I1->isIdenticalToWhenDefined(I2))
+      return false;
+    if (!I1->isTerminator())
+      return false;
+    goto HoistTerminator;
+  }
+
   do {
     // If we are hoisting the terminator instruction, don't move one (making a
     // broken BB), instead clone it, and remove BI.
@@ -6443,9 +6457,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // can hoist it up to the branching block.
   if (BI->getSuccessor(0)->getSinglePredecessor()) {
     if (BI->getSuccessor(1)->getSinglePredecessor()) {
-      if (HoistCommon && Options.HoistCommonInsts)
-        if (HoistThenElseCodeToIf(BI, TTI))
-          return requestResimplify();
+      if (HoistCommon &&
+          HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts))
+        return requestResimplify();
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
       // execute Successor #0 if it branches to Successor #1.

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
index 23daafda70e7..72f87526c1d4 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
@@ -41,28 +41,54 @@ return:                                           ; preds = %if.end3, %if.then2,
 define void @loop(double* %X, double* %Y) {
 ; CHECK-LABEL: @loop(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 20000
+; CHECK-NEXT:    [[SCEVGEP9:%.*]] = getelementptr double, double* [[Y:%.*]], i64 20000
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP9]], [[X]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
+; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD11]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], <double 6.000000e+00, double 6.000000e+00>
+; CHECK-NEXT:    [[TMP8:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD11]], <double 6.000000e+00, double 6.000000e+00>
+; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD11]]
+; CHECK-NEXT:    [[TMP11:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> zeroinitializer, <2 x double> [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[TMP13]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 2
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast double* [[TMP15]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP12]], <2 x double>* [[TMP16]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000
+; CHECK-NEXT:    br i1 [[TMP17]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[CLAMP_EXIT:%.*]] ]
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_05]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt double [[TMP0]], 0.000000e+00
-; CHECK-NEXT:    br i1 [[CMP_I]], label [[CLAMP_EXIT]], label [[IF_END_I:%.*]]
-; CHECK:       if.end.i:
-; CHECK-NEXT:    [[CMP1_I:%.*]] = fcmp ogt double [[TMP0]], 6.000000e+00
-; CHECK-NEXT:    br i1 [[CMP1_I]], label [[CLAMP_EXIT]], label [[IF_END3_I:%.*]]
-; CHECK:       if.end3.i:
-; CHECK-NEXT:    br label [[CLAMP_EXIT]]
-; CHECK:       clamp.exit:
-; CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi double [ [[TMP0]], [[IF_END3_I]] ], [ 0.000000e+00, [[FOR_BODY]] ], [ 6.000000e+00, [[IF_END_I]] ]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt double [[TMP18]], 0.000000e+00
+; CHECK-NEXT:    [[CMP1_I:%.*]] = fcmp ogt double [[TMP18]], 6.000000e+00
+; CHECK-NEXT:    [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP18]]
+; CHECK-NEXT:    [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    store double [[RETVAL_0_I]], double* [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_05]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_05]], 19999
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP7:![0-9]+]]
 ;
 entry:
   %X.addr = alloca double*, align 8

diff  --git a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll
index c8bdba512ee2..efbf481e61a4 100644
--- a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll
+++ b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll
@@ -110,8 +110,8 @@ return:
 }
 
 ; A example where only the branch instructions from %if.then2 and %if.end3 need
-; to be hoisted, which effectively replaces the original branch in %if.end. As
-; this does not add any new instructions to the hoist location, it should always be profitable.
+; to be hoisted, which effectively replaces the original branch in %if.end and
+; only requires selects for PHIs in the successor.
 define float @clamp_float_value(float %value, float %minimum_value, float %maximum_value) {
 ; HOIST-LABEL: @clamp_float_value(
 ; HOIST-NEXT:  entry:
@@ -124,14 +124,9 @@ define float @clamp_float_value(float %value, float %minimum_value, float %maxim
 ; NOHOIST-LABEL: @clamp_float_value(
 ; NOHOIST-NEXT:  entry:
 ; NOHOIST-NEXT:    [[CMP:%.*]] = fcmp ogt float [[VALUE:%.*]], [[MAXIMUM_VALUE:%.*]]
-; NOHOIST-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]]
-; NOHOIST:       if.end:
 ; NOHOIST-NEXT:    [[CMP1:%.*]] = fcmp olt float [[VALUE]], [[MINIMUM_VALUE:%.*]]
-; NOHOIST-NEXT:    br i1 [[CMP1]], label [[RETURN]], label [[IF_END3:%.*]]
-; NOHOIST:       if.end3:
-; NOHOIST-NEXT:    br label [[RETURN]]
-; NOHOIST:       return:
-; NOHOIST-NEXT:    [[RETVAL_0:%.*]] = phi float [ [[VALUE]], [[IF_END3]] ], [ [[MAXIMUM_VALUE]], [[ENTRY:%.*]] ], [ [[MINIMUM_VALUE]], [[IF_END]] ]
+; NOHOIST-NEXT:    [[MINIMUM_VALUE_VALUE:%.*]] = select i1 [[CMP1]], float [[MINIMUM_VALUE]], float [[VALUE]]
+; NOHOIST-NEXT:    [[RETVAL_0:%.*]] = select i1 [[CMP]], float [[MAXIMUM_VALUE]], float [[MINIMUM_VALUE_VALUE]]
 ; NOHOIST-NEXT:    ret float [[RETVAL_0]]
 ;
 entry:


        


More information about the llvm-commits mailing list