[llvm] 65f78e7 - [SimplifyCFG] Consider cost of combining predicates.
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 7 02:08:06 PDT 2020
Author: Sam Parker
Date: 2020-09-07T10:04:50+01:00
New Revision: 65f78e73ad574bb73bb625c787850acd261ba53a
URL: https://github.com/llvm/llvm-project/commit/65f78e73ad574bb73bb625c787850acd261ba53a
DIFF: https://github.com/llvm/llvm-project/commit/65f78e73ad574bb73bb625c787850acd261ba53a.diff
LOG: [SimplifyCFG] Consider cost of combining predicates.
Modify FoldBranchToCommonDest to consider the cost of inserting
instructions when attempting to combine predicates to fold blocks.
The threshold can be controlled via a new option:
-simplifycfg-branch-fold-threshold which defaults to '2' to allow
the insertion of a not and another logical operator.
Differential Revision: https://reviews.llvm.org/D86526
Added:
Modified:
llvm/include/llvm/Transforms/Utils/Local.h
llvm/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 5ab2dd496282..fb6f0269a0ac 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -199,6 +199,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
/// branches to us and one of our successors, fold the setcc into the
/// predecessor and use logical operations to pick the right destination.
bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr,
+ const TargetTransformInfo *TTI = nullptr,
unsigned BonusInstThreshold = 1);
/// This function takes a virtual register computed by an Instruction and
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index ae2471969160..124a7c423e72 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -143,6 +143,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
cl::desc("Max size of a block which is still considered "
"small enough to thread through"));
+// Two is chosen to allow one negation and a logical combine.
+static cl::opt<unsigned>
+ BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
+ cl::init(2),
+ cl::desc("Maximum cost of combining conditions when "
+ "folding branches"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -2684,12 +2691,16 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
+ const TargetTransformInfo *TTI,
unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
const unsigned PredCount = pred_size(BB);
bool Changed = false;
+ TargetTransformInfo::TargetCostKind CostKind =
+ BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
Instruction *Cond = nullptr;
if (BI->isConditional())
@@ -2818,6 +2829,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
continue;
}
+ // Check the cost of inserting the necessary logic before performing the
+ // transformation.
+ if (TTI && Opc != Instruction::BinaryOpsEnd) {
+ Type *Ty = BI->getCondition()->getType();
+ unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
+ if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
+ !isa<CmpInst>(PBI->getCondition())))
+ Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
+
+ if (Cost > BranchFoldThreshold)
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
Changed = true;
@@ -6013,7 +6037,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
return requestResimplify();
return false;
}
@@ -6076,7 +6100,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold))
return requestResimplify();
// We have a conditional branch to two blocks that are only reachable
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll
index 2bcbaff50a97..ffb13ca583f7 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll
@@ -169,19 +169,34 @@ cond.end:
}
define i32 @or_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
-; CHECK-LABEL: @or_predicate_minsize(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
-; CHECK: cond.false:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT: br label [[COND_END]]
-; CHECK: cond.end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; THUMB-LABEL: @or_predicate_minsize(
+; THUMB-NEXT: entry:
+; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
+; THUMB-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; THUMB: lor.lhs.false:
+; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
+; THUMB: cond.false:
+; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT: br label [[COND_END]]
+; THUMB: cond.end:
+; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT: ret i32 [[COND]]
+;
+; ARM-LABEL: @or_predicate_minsize(
+; ARM-NEXT: entry:
+; ARM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
+; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; ARM: cond.false:
+; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT: br label [[COND_END]]
+; ARM: cond.end:
+; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp sgt i32 %d, 3
@@ -202,19 +217,34 @@ cond.end:
}
define i32 @or_invert_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 {
-; CHECK-LABEL: @or_invert_predicate_minsize(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
-; CHECK: cond.false:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT: br label [[COND_END]]
-; CHECK: cond.end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; THUMB-LABEL: @or_invert_predicate_minsize(
+; THUMB-NEXT: entry:
+; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3
+; THUMB-NEXT: br i1 [[CMP]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
+; THUMB: lor.lhs.false:
+; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
+; THUMB: cond.false:
+; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT: br label [[COND_END]]
+; THUMB: cond.end:
+; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT: ret i32 [[COND]]
+;
+; ARM-LABEL: @or_invert_predicate_minsize(
+; ARM-NEXT: entry:
+; ARM-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3
+; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; ARM: cond.false:
+; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT: br label [[COND_END]]
+; ARM: cond.end:
+; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp sgt i32 %d, 3
@@ -267,19 +297,33 @@ cond.end:
}
define i32 @or_xor_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
-; CHECK-LABEL: @or_xor_predicate_minsize(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
-; CHECK: cond.false:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT: br label [[COND_END]]
-; CHECK: cond.end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; THUMB-LABEL: @or_xor_predicate_minsize(
+; THUMB-NEXT: entry:
+; THUMB-NEXT: br i1 [[CMP:%.*]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]]
+; THUMB: lor.lhs.false:
+; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]]
+; THUMB: cond.false:
+; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT: br label [[COND_END]]
+; THUMB: cond.end:
+; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT: ret i32 [[COND]]
+;
+; ARM-LABEL: @or_xor_predicate_minsize(
+; ARM-NEXT: entry:
+; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
+; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]]
+; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; ARM: cond.false:
+; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT: br label [[COND_END]]
+; ARM: cond.end:
+; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT: ret i32 [[COND]]
;
entry:
br i1 %cmp, label %lor.lhs.false, label %cond.end
@@ -331,19 +375,33 @@ cond.end:
}
define i32 @and_xor_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 {
-; CHECK-LABEL: @and_xor_minsize(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
-; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
-; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]]
-; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
-; CHECK: cond.false:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
-; CHECK-NEXT: br label [[COND_END]]
-; CHECK: cond.end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; THUMB-LABEL: @and_xor_minsize(
+; THUMB-NEXT: entry:
+; THUMB-NEXT: br i1 [[CMP:%.*]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; THUMB: lor.lhs.false:
+; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; THUMB-NEXT: br i1 [[CMP1]], label [[COND_FALSE:%.*]], label [[COND_END]]
+; THUMB: cond.false:
+; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; THUMB-NEXT: br label [[COND_END]]
+; THUMB: cond.end:
+; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; THUMB-NEXT: ret i32 [[COND]]
+;
+; ARM-LABEL: @and_xor_minsize(
+; ARM-NEXT: entry:
+; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true
+; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]]
+; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]]
+; ARM-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]]
+; ARM-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
+; ARM: cond.false:
+; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; ARM-NEXT: br label [[COND_END]]
+; ARM: cond.end:
+; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; ARM-NEXT: ret i32 [[COND]]
;
entry:
br i1 %cmp, label %cond.end, label %lor.lhs.false
More information about the llvm-commits
mailing list