[PATCH] D20030: [AArch64] Add option to disable speculation of triangle whose tail is the only latch block

Balaram Makam via llvm-commits llvm-commits at lists.llvm.org
Fri May 6 14:07:37 PDT 2016


bmakam created this revision.
bmakam added reviewers: mcrosier, jmolloy, t.p.northover, llvm-commits.
Herald added subscribers: mcrosier, rengolin, aemerson.

This patch adds an option to disable speculation of a triangle when its
 tail is the only latch block of this loop. At this time, the option
 -aarch64-ccmp-disable-triangle-latch is disabled by default. I'm hoping for feedback
 from others on the profitability on other targets.

 When the tail of triangle is the only latch block of this loop, we end up inserting ccmp
 inside the critical path of the loop. If the speculated code is cold we execute
 the cold code for all the loop iterations. If the speculated code were hot the branch
 predictor would anyway take that direction.

 This impacts the chances of forming a ld/st pair because now the loads could possibly
 end up in different blocks. However, when tested on Kryo the performance was slightly
 better on spec2006 CINT/CFP benchmarks and no regressions above noise range.

http://reviews.llvm.org/D20030

Files:
  lib/Target/AArch64/AArch64ConditionalCompares.cpp
  test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll

Index: test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mcpu=kryo -mtriple=aarch64--linux-gnu -verify-machineinstrs -aarch64-ccmp -aarch64-ccmp-disable-triangle-latch| FileCheck %s
+
+%struct.arc = type { i64, %struct.node*, %struct.node*, i32, %struct.arc*, %struct.arc*, i64, i64 }
+%struct.node = type { i64, i32, %struct.node*, %struct.node*, %struct.node*, %struct.node*, %struct.arc*, %struct.arc*, %struct.arc*, %struct.arc*, i64, i64, i32, i32 }
+%struct.basket = type { %struct.arc*, i64, i64 }
+
+; CHECK: foo
+; CHECK: %if.then34
+; CHECK: cmp x{{[0-9]+}}, #1
+; CHECK-NEXT: b.ge
+; CHECK: %if.then34.if.else.exit
+; CHECK: cmp w{{[0-9]+}}, #2
+; CHECK-NEXT: b.ne
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  br label %for.body
+
+for.body:                                       ; preds = %for.inc, %entry
+  %arc = phi %struct.arc* [ %add.ptr60, %for.inc ], [ undef, %entry ]
+  %ident32 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 3
+  %ident32.load = load i32, i32* %ident32, align 8
+  %cmp33 = icmp sgt i32 %ident32.load, 0
+  br i1 %cmp33, label %if.then34, label %for.inc
+
+if.then34:                                        ; preds = %for.body
+  %cost35 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 0
+  %0 = load i64, i64* %cost35, align 8
+  %tail36 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 1
+  %1 = load %struct.node*, %struct.node** %tail36, align 8
+  %potential37 = getelementptr inbounds %struct.node, %struct.node* %1, i64 0, i32 0
+  %2 = load i64, i64* %potential37, align 8
+  %sub38 = sub nsw i64 %0, %2
+  %head39 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 2
+  %3= load %struct.node*, %struct.node** %head39, align 8
+  %potential40 = getelementptr inbounds %struct.node, %struct.node* %3, i64 0, i32 0
+  %4 = load i64, i64* %potential40, align 8
+  %add41 = add nsw i64 %4, %sub38
+  %cmp.i = icmp sgt i64 %add41, 0
+  br i1 %cmp.i, label %land.lhs.true.i, label %if.then34.if.else.exit
+
+land.lhs.true.i:                                  ; preds = %if.then34
+  %cmp1.i = icmp eq i32 %ident32.load, 1
+  br i1 %cmp1.i, label %if.then43, label %for.inc
+
+if.then34.if.else.exit:                      ; preds = %if.then34
+  %cmp2.i = icmp sgt i64 %add41, 0
+  %cmp4.i = icmp eq i32 %ident32.load, 2
+  %cmp4.i. = and i1 %cmp4.i, %cmp2.i
+  br i1 %cmp4.i., label %if.then43, label %for.inc
+
+if.then43:                                        ; preds = %if.then34
+  %abs_cost56 = getelementptr inbounds %struct.basket, %struct.basket* undef, i64 0, i32 2
+  br label %for.inc
+
+for.inc:                                        ; preds = %if.then43, %if.then34, %for.body
+  %add.ptr60 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 undef
+  br label %for.body
+}
Index: lib/Target/AArch64/AArch64ConditionalCompares.cpp
===================================================================
--- lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -51,6 +51,12 @@
 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
                             cl::desc("Turn all knobs to 11"));
 
+// disable speculation of triangle when its tail is the only latch block
+// of this loop.
+static cl::opt<bool> DisableTriangleLatch(
+    "aarch64-ccmp-disable-triangle-latch", cl::init(false), cl::Hidden,
+    cl::desc("Disable when the tail block is a loop latch."));
+
 STATISTIC(NumConsidered, "Number of ccmps considered");
 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
@@ -867,6 +873,18 @@
     DEBUG(dbgs() << "Too many instructions to speculate.\n");
     return false;
   }
+
+  // Heuristic: If the tail is the only latch block for this loop then the
+  // compare conversion delays the loop backedge because we now execute ccmp
+  // instruction inside the critical path of the loop.
+  if (DisableTriangleLatch && Loops)
+    if (MachineLoop *ML = Loops->getLoopFor(CmpConv.Head))
+      if (MachineBasicBlock *LatchBB = ML->getLoopLatch())
+        if (LatchBB == CmpConv.Tail) {
+          DEBUG(dbgs() << "Won't speculate when tail block is a loop latch.\n");
+          return false;
+        }
+
   return true;
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20030.56459.patch
Type: text/x-patch
Size: 4497 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160506/e8543908/attachment.bin>


More information about the llvm-commits mailing list