[PATCH] D20030: [AArch64] Add option to disable speculation of triangle whose tail is the only latch block
Balaram Makam via llvm-commits
llvm-commits at lists.llvm.org
Fri May 6 14:07:37 PDT 2016
bmakam created this revision.
bmakam added reviewers: mcrosier, jmolloy, t.p.northover, llvm-commits.
Herald added subscribers: mcrosier, rengolin, aemerson.
This patch adds an option to disable speculation of a triangle when its
tail is the only latch block of this loop. At this time, the option
-aarch64-ccmp-disable-triangle-latch is disabled by default. I'm hoping for feedback
from others on the profitability on other targets.
When the tail of triangle is the only latch block of this loop, we end up inserting ccmp
inside the critical path of the loop. If the speculated code is cold we execute
the cold code for all the loop iterations. If the speculated code were hot the branch
predictor would anyway take that direction.
This impacts the chances of forming a ld/st pair because now the loads could possibly
end up in different blocks. However, when tested on Kryo the performance was slightly
better on spec2006 CINT/CFP benchmarks and no regressions above noise range.
http://reviews.llvm.org/D20030
Files:
lib/Target/AArch64/AArch64ConditionalCompares.cpp
test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll
Index: test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mcpu=kryo -mtriple=aarch64--linux-gnu -verify-machineinstrs -aarch64-ccmp -aarch64-ccmp-disable-triangle-latch| FileCheck %s
+
+%struct.arc = type { i64, %struct.node*, %struct.node*, i32, %struct.arc*, %struct.arc*, i64, i64 }
+%struct.node = type { i64, i32, %struct.node*, %struct.node*, %struct.node*, %struct.node*, %struct.arc*, %struct.arc*, %struct.arc*, %struct.arc*, i64, i64, i32, i32 }
+%struct.basket = type { %struct.arc*, i64, i64 }
+
+; CHECK: foo
+; CHECK: %if.then34
+; CHECK: cmp x{{[0-9]+}}, #1
+; CHECK-NEXT: b.ge
+; CHECK: %if.then34.if.else.exit
+; CHECK: cmp w{{[0-9]+}}, #2
+; CHECK-NEXT: b.ne
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %arc = phi %struct.arc* [ %add.ptr60, %for.inc ], [ undef, %entry ]
+ %ident32 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 3
+ %ident32.load = load i32, i32* %ident32, align 8
+ %cmp33 = icmp sgt i32 %ident32.load, 0
+ br i1 %cmp33, label %if.then34, label %for.inc
+
+if.then34: ; preds = %for.body
+ %cost35 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 0
+ %0 = load i64, i64* %cost35, align 8
+ %tail36 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 1
+ %1 = load %struct.node*, %struct.node** %tail36, align 8
+ %potential37 = getelementptr inbounds %struct.node, %struct.node* %1, i64 0, i32 0
+ %2 = load i64, i64* %potential37, align 8
+ %sub38 = sub nsw i64 %0, %2
+ %head39 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 2
+ %3= load %struct.node*, %struct.node** %head39, align 8
+ %potential40 = getelementptr inbounds %struct.node, %struct.node* %3, i64 0, i32 0
+ %4 = load i64, i64* %potential40, align 8
+ %add41 = add nsw i64 %4, %sub38
+ %cmp.i = icmp sgt i64 %add41, 0
+ br i1 %cmp.i, label %land.lhs.true.i, label %if.then34.if.else.exit
+
+land.lhs.true.i: ; preds = %if.then34
+ %cmp1.i = icmp eq i32 %ident32.load, 1
+ br i1 %cmp1.i, label %if.then43, label %for.inc
+
+if.then34.if.else.exit: ; preds = %if.then34
+ %cmp2.i = icmp sgt i64 %add41, 0
+ %cmp4.i = icmp eq i32 %ident32.load, 2
+ %cmp4.i. = and i1 %cmp4.i, %cmp2.i
+ br i1 %cmp4.i., label %if.then43, label %for.inc
+
+if.then43: ; preds = %if.then34
+ %abs_cost56 = getelementptr inbounds %struct.basket, %struct.basket* undef, i64 0, i32 2
+ br label %for.inc
+
+for.inc: ; preds = %if.then43, %if.then34, %for.body
+ %add.ptr60 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 undef
+ br label %for.body
+}
Index: lib/Target/AArch64/AArch64ConditionalCompares.cpp
===================================================================
--- lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -51,6 +51,12 @@
static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
cl::desc("Turn all knobs to 11"));
+// disable speculation of triangle when its tail is the only latch block
+// of this loop.
+static cl::opt<bool> DisableTriangleLatch(
+ "aarch64-ccmp-disable-triangle-latch", cl::init(false), cl::Hidden,
+ cl::desc("Disable when the tail block is a loop latch."));
+
STATISTIC(NumConsidered, "Number of ccmps considered");
STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
@@ -867,6 +873,18 @@
DEBUG(dbgs() << "Too many instructions to speculate.\n");
return false;
}
+
+ // Heuristic: If the tail is the only latch block for this loop then the
+ // compare conversion delays the loop backedge because we now execute ccmp
+ // instruction inside the critical path of the loop.
+ if (DisableTriangleLatch && Loops)
+ if (MachineLoop *ML = Loops->getLoopFor(CmpConv.Head))
+ if (MachineBasicBlock *LatchBB = ML->getLoopLatch())
+ if (LatchBB == CmpConv.Tail) {
+ DEBUG(dbgs() << "Won't speculate when tail block is a loop latch.\n");
+ return false;
+ }
+
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20030.56459.patch
Type: text/x-patch
Size: 4497 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160506/e8543908/attachment.bin>
More information about the llvm-commits
mailing list