[llvm] 65f78e7 - [SimplifyCFG] Consider cost of combining predicates.

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 7 02:08:06 PDT 2020


Author: Sam Parker
Date: 2020-09-07T10:04:50+01:00
New Revision: 65f78e73ad574bb73bb625c787850acd261ba53a

URL: https://github.com/llvm/llvm-project/commit/65f78e73ad574bb73bb625c787850acd261ba53a
DIFF: https://github.com/llvm/llvm-project/commit/65f78e73ad574bb73bb625c787850acd261ba53a.diff

LOG: [SimplifyCFG] Consider cost of combining predicates.

Modify FoldBranchToCommonDest to consider the cost of inserting
instructions when attempting to combine predicates to fold blocks.
The threshold can be controlled via a new option:
-simplifycfg-branch-fold-threshold which defaults to '2' to allow
the insertion of a not and another logical operator.

Differential Revision: https://reviews.llvm.org/D86526

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/Local.h
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 5ab2dd496282..fb6f0269a0ac 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -199,6 +199,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
 /// branches to us and one of our successors, fold the setcc into the
 /// predecessor and use logical operations to pick the right destination.
 bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr,
+                            const TargetTransformInfo *TTI = nullptr,
                             unsigned BonusInstThreshold = 1);
 
 /// This function takes a virtual register computed by an Instruction and

diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index ae2471969160..124a7c423e72 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -143,6 +143,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
                   cl::desc("Max size of a block which is still considered "
                            "small enough to thread through"));
 
+// Two is chosen to allow one negation and a logical combine.
+static cl::opt<unsigned>
+    BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
+                        cl::init(2),
+                        cl::desc("Maximum cost of combining conditions when "
+                                 "folding branches"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLinearMaps,
           "Number of switch instructions turned into linear mapping");
@@ -2684,12 +2691,16 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
 /// and one of our successors, fold the block into the predecessor and use
 /// logical operations to pick the right destination.
 bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
+                                  const TargetTransformInfo *TTI,
                                   unsigned BonusInstThreshold) {
   BasicBlock *BB = BI->getParent();
 
   const unsigned PredCount = pred_size(BB);
 
   bool Changed = false;
+  TargetTransformInfo::TargetCostKind CostKind =
+    BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
+                                  : TargetTransformInfo::TCK_SizeAndLatency;
 
   Instruction *Cond = nullptr;
   if (BI->isConditional())
@@ -2818,6 +2829,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
         continue;
     }
 
+    // Check the cost of inserting the necessary logic before performing the
+    // transformation.
+    if (TTI && Opc != Instruction::BinaryOpsEnd) {
+      Type *Ty = BI->getCondition()->getType();
+      unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
+      if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
+          !isa<CmpInst>(PBI->getCondition())))
+        Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
+
+      if (Cost > BranchFoldThreshold)
+        continue;
+    }
+
     LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
     Changed = true;
 
@@ -6013,7 +6037,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
   // branches to us and our successor, fold the comparison into the
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
-  if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+  if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
     return requestResimplify();
   return false;
 }
@@ -6076,7 +6100,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
-  if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+  if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
     return requestResimplify();
 
   // We have a conditional branch to two blocks that are only reachable

diff  --git a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll
index 2bcbaff50a97..ffb13ca583f7 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll
@@ -169,19 +169,34 @@ cond.end:
 }
 
 define i32 @or_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
-; CHECK-LABEL: @or_predicate_minsize(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
-; CHECK:       cond.false:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT:    br label [[COND_END]]
-; CHECK:       cond.end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; THUMB-LABEL: @or_predicate_minsize(
+; THUMB-NEXT:  entry:
+; THUMB-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
+; THUMB-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; THUMB:       lor.lhs.false:
+; THUMB-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT:    br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
+; THUMB:       cond.false:
+; THUMB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT:    br label [[COND_END]]
+; THUMB:       cond.end:
+; THUMB-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT:    ret i32 [[COND]]
+;
+; ARM-LABEL: @or_predicate_minsize(
+; ARM-NEXT:  entry:
+; ARM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
+; ARM-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; ARM-NEXT:    br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; ARM:       cond.false:
+; ARM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT:    br label [[COND_END]]
+; ARM:       cond.end:
+; ARM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT:    ret i32 [[COND]]
 ;
 entry:
   %cmp = icmp sgt i32 %d, 3
@@ -202,19 +217,34 @@ cond.end:
 }
 
 define i32 @or_invert_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
-; CHECK-LABEL: @or_invert_predicate_minsize(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
-; CHECK:       cond.false:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT:    br label [[COND_END]]
-; CHECK:       cond.end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; THUMB-LABEL: @or_invert_predicate_minsize(
+; THUMB-NEXT:  entry:
+; THUMB-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
+; THUMB-NEXT:    br i1 [[CMP]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
+; THUMB:       lor.lhs.false:
+; THUMB-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT:    br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
+; THUMB:       cond.false:
+; THUMB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT:    br label [[COND_END]]
+; THUMB:       cond.end:
+; THUMB-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT:    ret i32 [[COND]]
+;
+; ARM-LABEL: @or_invert_predicate_minsize(
+; ARM-NEXT:  entry:
+; ARM-NEXT:    [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3
+; ARM-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; ARM-NEXT:    br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; ARM:       cond.false:
+; ARM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT:    br label [[COND_END]]
+; ARM:       cond.end:
+; ARM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT:    ret i32 [[COND]]
 ;
 entry:
   %cmp = icmp sgt i32 %d, 3
@@ -267,19 +297,33 @@ cond.end:
 }
 
 define i32 @or_xor_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
-; CHECK-LABEL: @or_xor_predicate_minsize(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
-; CHECK:       cond.false:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT:    br label [[COND_END]]
-; CHECK:       cond.end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; THUMB-LABEL: @or_xor_predicate_minsize(
+; THUMB-NEXT:  entry:
+; THUMB-NEXT:    br i1 [[CMP:%.*]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
+; THUMB:       lor.lhs.false:
+; THUMB-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT:    br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
+; THUMB:       cond.false:
+; THUMB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT:    br label [[COND_END]]
+; THUMB:       cond.end:
+; THUMB-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT:    ret i32 [[COND]]
+;
+; ARM-LABEL: @or_xor_predicate_minsize(
+; ARM-NEXT:  entry:
+; ARM-NEXT:    [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
+; ARM-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]]
+; ARM-NEXT:    br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; ARM:       cond.false:
+; ARM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT:    br label [[COND_END]]
+; ARM:       cond.end:
+; ARM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT:    ret i32 [[COND]]
 ;
 entry:
   br i1 %cmp, label %lor.lhs.false, label %cond.end
@@ -331,19 +375,33 @@ cond.end:
 }
 
 define i32 @and_xor_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
-; CHECK-LABEL: @and_xor_minsize(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
-; CHECK:       cond.false:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT:    br label [[COND_END]]
-; CHECK:       cond.end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; THUMB-LABEL: @and_xor_minsize(
+; THUMB-NEXT:  entry:
+; THUMB-NEXT:    br i1 [[CMP:%.*]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; THUMB:       lor.lhs.false:
+; THUMB-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT:    br i1 [[CMP1]], label [[COND_FALSE:%.*]], label [[COND_END]]
+; THUMB:       cond.false:
+; THUMB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT:    br label [[COND_END]]
+; THUMB:       cond.end:
+; THUMB-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT:    ret i32 [[COND]]
+;
+; ARM-LABEL: @and_xor_minsize(
+; ARM-NEXT:  entry:
+; ARM-NEXT:    [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
+; ARM-NEXT:    [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]]
+; ARM-NEXT:    br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
+; ARM:       cond.false:
+; ARM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT:    br label [[COND_END]]
+; ARM:       cond.end:
+; ARM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT:    ret i32 [[COND]]
 ;
 entry:
   br i1 %cmp, label %cond.end, label %lor.lhs.false


        


More information about the llvm-commits mailing list