[PATCH] D67281: [AArch64][SimplifyCFG] Add additional cost for instructions in mergeConditionalStoreToAddress
Pavel Kosov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 05:24:22 PST 2019
kpdev42 updated this revision to Diff 230235.
kpdev42 added a comment.
I changed my patch, now before merging blocks it calculates existing instructions in DomBB,
just to be sure that this block is not too big already
Maybe instead of using phi-node-folding threshold it is needed to use some other separate threshold.
And also it is still not clear, if we need to optimize this in a middle end or in a backend.
Maybe backend will be more proper place for it. @dmgreen please feel free to bring a light to this problem
P.S.: In this change I also simplified a testcase
---------------------------------------------------------------------------------------------
Recall, what exactly I trying to eliminate:
clang merges too big basic blocks and it leads to performance regression
Before merging:
tst x18, x1
b.eq .LBB0_10
.LBB0_9:
orr x16, x16, x18
add w0, w0, #1
str xzr, [x13, #56]
.LBB0_10:
cbz x11, .LBB0_7
After merging
and x2, x18, x1
orr x3, x16, x18
tst x18, x1
orr x2, x2, x11
cinc w0, w0, ne
csel x16, x16, x3, eq
cbz x2, .LBB0_7
------------------------------------------------------------------------------------------------
Similar picture in test case:
Before merging:
for.body.lr.ph: ; preds = %entry
%0 = load %struct.ptr_wrapper*, %struct.ptr_wrapper** @g_wrapper, align 8
%proc = getelementptr inbounds %struct.ptr_wrapper, %struct.ptr_wrapper* %0, i64 0, i32 1
%and2 = and i64 %mask, 1
%tobool3 = icmp eq i64 %and2, 0
%and = and i64 %bit, %in
%tobool = icmp eq i64 %and, 0
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %for.body.lr.ph
%or = or i64 0, %bit
%inc = add nsw i32 0, 1
br label %if.end
if.end: ; preds = %if.then, %for.body.lr.ph
%retval1.1 = phi i32 [ %inc, %if.then ], [ 0, %for.body.lr.ph ]
%res_in.1 = phi i64 [ %or, %if.then ], [ 0, %for.body.lr.ph ]
%1 = xor i1 %tobool, true
%2 = xor i1 %tobool3, true
%3 = or i1 %1, %2
store i8* null, i8** %proc, align 8
%shl = shl i64 %bit, 1
%cmp = icmp eq i64 %shl, 0
br label %for.end
After merging:
for.body.lr.ph: ; preds = %entry
%0 = load %struct.ptr_wrapper*, %struct.ptr_wrapper** @g_wrapper, align 8
%proc = getelementptr inbounds %struct.ptr_wrapper, %struct.ptr_wrapper* %0, i64 0, i32 1
%and2 = and i64 %mask, 1
%tobool3 = icmp eq i64 %and2, 0
%and = and i64 %bit, %in
%tobool = icmp eq i64 %and, 0
%or = or i64 0, %bit
%inc = add nsw i32 0, 1
%retval1.1 = select i1 %tobool, i32 0, i32 %inc
%res_in.1 = select i1 %tobool, i64 0, i64 %or
%1 = xor i1 %tobool, true
%2 = xor i1 %tobool3, true
%3 = or i1 %1, %2
store i8* null, i8** %proc, align 8
%shl = shl i64 %bit, 1
%cmp = icmp eq i64 %shl, 0
br label %for.end
------------------------------------------------------------------------------------------------
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D67281/new/
https://reviews.llvm.org/D67281
Files:
llvm/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/test/Transforms/SimplifyCFG/AArch64/check-instr-cost-for-folding.ll
Index: llvm/test/Transforms/SimplifyCFG/AArch64/check-instr-cost-for-folding.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SimplifyCFG/AArch64/check-instr-cost-for-folding.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -simplifycfg-db-instr-count=true -mtriple=aarch64-linux-gnu -simplifycfg -S >%t
+; RUN: FileCheck %s < %t
+; ModuleID = 'test_func.c'
+
+%struct.ptr_wrapper = type { i32*, i8* }
+
+ at g_wrapper = external dso_local local_unnamed_addr global %struct.ptr_wrapper*, align 8
+
+define dso_local i32 @test_func(i64 %in, i64 %bit, i64 %mask) local_unnamed_addr {
+entry:
+ %cmp16 = icmp eq i64 %bit, 0
+ br i1 %cmp16, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+; CHECK-LABEL: for.body.lr.ph:
+; CHECK-NOT: select
+ %0 = load %struct.ptr_wrapper*, %struct.ptr_wrapper** @g_wrapper, align 8
+ %proc = getelementptr inbounds %struct.ptr_wrapper, %struct.ptr_wrapper* %0, i64 0, i32 1
+ %and2 = and i64 %mask, 1
+ %tobool3 = icmp eq i64 %and2, 0
+ %and = and i64 %bit, %in
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %for.body.lr.ph
+ %or = or i64 0, %bit
+ %inc = add nsw i32 0, 1
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body.lr.ph
+ %retval1.1 = phi i32 [ %inc, %if.then ], [ 0, %for.body.lr.ph ]
+ %res_in.1 = phi i64 [ %or, %if.then ], [ 0, %for.body.lr.ph ]
+ %1 = xor i1 %tobool, true
+ %2 = xor i1 %tobool3, true
+ %3 = or i1 %1, %2
+ store i8* null, i8** %proc, align 8
+ %shl = shl i64 %bit, 1
+ %cmp = icmp eq i64 %shl, 0
+ br label %for.end
+
+for.end: ; preds = %if.end, %entry
+ %retval1.0.lcssa = phi i32 [ 0, %entry ], [ %retval1.1, %if.end ]
+ %res_in.0.lcssa = phi i64 [ 0, %entry ], [ %res_in.1, %if.end ]
+ %4 = trunc i64 %res_in.0.lcssa to i32
+ %conv7 = add i32 %retval1.0.lcssa, %4
+ ret i32 %conv7
+}
Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -101,6 +101,10 @@
"to speculatively execute to fold a 2-entry PHI node into a "
"select (default = 4)"));
+static cl::opt<bool> CheckDomBlockInstructionsCount(
+ "simplifycfg-db-instr-count", cl::Hidden, cl::init(false),
+ cl::desc("Do not merge BBs if domblock already has more than phi-node-folding-threshold instructions"));
+
static cl::opt<bool> DupRet(
"simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
@@ -2413,6 +2417,25 @@
<< " T: " << IfTrue->getName()
<< " F: " << IfFalse->getName() << "\n");
+ // We need to be sure, that DomBlock has
+ // enough room for new instructions
+ // First add cost of Select instruction, that will be added to this block
+ // (this cost is equal to number of phi nodes in BB)
+ unsigned Cost = NumPhis;
+
+ if (CheckDomBlockInstructionsCount) {
+ for (const auto& Instr : *DomBlock)
+ {
+ if (!isa<BranchInst>(&Instr))
+ Cost += TTI.getUserCost(&Instr);
+ }
+
+ if (Cost > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) {
+ // DomBlock already too large
+ return false;
+ }
+ }
+
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
Instruction *InsertPt = DomBlock->getTerminator();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D67281.230235.patch
Type: text/x-patch
Size: 3687 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191120/57e5265c/attachment-0001.bin>
More information about the llvm-commits
mailing list