[llvm] r255660 - [SimplifyCFG] allow speculation of exactly one expensive instruction (PR24818)

Tue Dec 15 09:38:32 PST 2015

Author: spatel
Date: Tue Dec 15 11:38:29 2015
New Revision: 255660

URL: http://llvm.org/viewvc/llvm-project?rev=255660&view=rev
Log:
[SimplifyCFG] allow speculation of exactly one expensive instruction (PR24818)

This is the last general step to allow more IR-level speculation with a safety harness in place in CodeGenPrepare.

The intent is to restore the behavior enabled by:
http://reviews.llvm.org/rL228826

but prevent bad performance such as:
https://llvm.org/bugs/show_bug.cgi?id=24818

Earlier patches in this sequence:
D12882 (disable SimplifyCFG speculation for expensive instructions)
D13297 (have CGP despeculate expensive ops)
D14630 (have CGP despeculate special versions of cttz/ctlz)

As shown in the test cases, we only have two instructions currently affected: ctz for some x86 and fdiv generally. 
Allowing exactly one expensive instruction is a bit of a hack, but it lines up with what is currently implemented
in CGP. If we make the despeculation more general in CGP, we can make the speculation here more liberal.

A follow-up patch will adjust the cost for sqrt and possibly other typically expensive math intrinsics (currently
everything is cheap by default). GPU targets would likely want to override those expensive default costs (just as
they probably should already override the cost of div/rem) because just about any math is cheaper than control-flow
on those targets.

Differential Revision: http://reviews.llvm.org/D15213


Modified:
    llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
    llvm/trunk/test/Transforms/SimplifyCFG/speculate-math.ll

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=255660&r1=255659&r2=255660&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Tue Dec 15 11:38:29 2015
@@ -84,6 +84,11 @@ static cl::opt<bool> MergeCondStoresAggr
     cl::desc("When merging conditional stores, do so even if the resultant "
              "basic blocks are unlikely to be if-converted as a result"));
 
+static cl::opt<bool> SpeculateOneExpensiveInst(
+    "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
+    cl::desc("Allow exactly one expensive instruction to be speculatively "
+             "executed"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
@@ -259,7 +264,8 @@ static unsigned ComputeSpeculationCost(c
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
                                 SmallPtrSetImpl<Instruction*> *AggressiveInsts,
                                 unsigned &CostRemaining,
-                                const TargetTransformInfo &TTI) {
+                                const TargetTransformInfo &TTI,
+                                unsigned Depth = 0) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
     // Non-instructions all dominate instructions, but not all constantexprs
@@ -297,15 +303,24 @@ static bool DominatesMergePoint(Value *V
 
   unsigned Cost = ComputeSpeculationCost(I, TTI);
 
-  if (Cost > CostRemaining)
+  // Allow exactly one instruction to be speculated regardless of its cost
+  // (as long as it is safe to do so).
+  // This is intended to flatten the CFG even if the instruction is a division
+  // or other expensive operation. The speculation of an expensive instruction
+  // is expected to be undone in CodeGenPrepare if the speculation has not
+  // enabled further IR optimizations.
+  if (Cost > CostRemaining &&
+      (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
     return false;
 
-  CostRemaining -= Cost;
+  // Avoid unsigned wrap.
+  CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
 
   // Okay, we can only really hoist these out if their operands do
   // not take us over the cost threshold.
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+                             Depth + 1))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
   AggressiveInsts->insert(I);

Modified: llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll?rev=255660&r1=255659&r2=255660&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll Tue Dec 15 11:38:29 2015
@@ -7,9 +7,7 @@ define i64 @test1(i64 %A) {
 ; ALL-LABEL: @test1(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
@@ -28,9 +26,7 @@ define i32 @test2(i32 %A) {
 ; ALL-LABEL: @test2(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
@@ -50,9 +46,7 @@ define signext i16 @test3(i16 signext %A
 ; ALL-LABEL: @test3(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0
@@ -72,9 +66,7 @@ define i64 @test1b(i64 %A) {
 ; ALL-LABEL: @test1b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
@@ -94,9 +86,7 @@ define i32 @test2b(i32 %A) {
 ; ALL-LABEL: @test2b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
@@ -116,9 +106,7 @@ define signext i16 @test3b(i16 signext %
 ; ALL-LABEL: @test3b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0

Modified: llvm/trunk/test/Transforms/SimplifyCFG/speculate-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/speculate-math.ll?rev=255660&r1=255659&r2=255660&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/speculate-math.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/speculate-math.ll Tue Dec 15 11:38:29 2015
@@ -1,4 +1,5 @@
-; RUN: opt -S -simplifycfg -phi-node-folding-threshold=2 < %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s --check-prefix=EXPENSIVE --check-prefix=ALL
+; RUN: opt -S -simplifycfg -speculate-one-expensive-inst=false < %s | FileCheck %s --check-prefix=CHEAP --check-prefix=ALL
 
 declare float @llvm.sqrt.f32(float) nounwind readonly
 declare float @llvm.fma.f32(float, float, float) nounwind readonly
@@ -7,19 +8,10 @@ declare float @llvm.fabs.f32(float) noun
 declare float @llvm.minnum.f32(float, float) nounwind readonly
 declare float @llvm.maxnum.f32(float, float) nounwind readonly
 
-; FIXME: This is intended to be a temporary test. As discussed in 
-; D12882, we actually do want to speculate even expensive operations
-; in SimplifyCFG because it can expose more optimizations for other
-; passes. Therefore, we either need to adjust SimplifyCFG's 
-; calculations that use the TTI cost model or use a different cost
-; model for deciding which ops should be speculated in SimplifyCFG. 
-; We should also be using the TTI cost model later - for example in
-; CodeGenPrepare - to potentially undo this speculation.
+; ALL-LABEL: @fdiv_test(
+; EXPENSIVE: select i1 %cmp, double %div, double 0.0
+; CHEAP-NOT: select
 
-; Do not speculate fdiv by default because it is generally expensive. 
-
-; CHECK-LABEL: @fdiv_test(
-; CHECK-NOT: select
 define double @fdiv_test(double %a, double %b) {
 entry:
   %cmp = fcmp ogt double %a, 0.0
@@ -34,8 +26,8 @@ cond.end:
   ret double %cond
 }
 
-; CHECK-LABEL: @sqrt_test(
-; CHECK: select
+; ALL-LABEL: @sqrt_test(
+; ALL: select
 define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -51,8 +43,8 @@ test_sqrt.exit:
   ret void
 }
 
-; CHECK-LABEL: @fabs_test(
-; CHECK: select
+; ALL-LABEL: @fabs_test(
+; ALL: select
 define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -68,8 +60,8 @@ test_fabs.exit:
   ret void
 }
 
-; CHECK-LABEL: @fma_test(
-; CHECK: select
+; ALL-LABEL: @fma_test(
+; ALL: select
 define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -85,8 +77,8 @@ test_fma.exit:
   ret void
 }
 
-; CHECK-LABEL: @fmuladd_test(
-; CHECK: select
+; ALL-LABEL: @fmuladd_test(
+; ALL: select
 define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -102,8 +94,8 @@ test_fmuladd.exit:
   ret void
 }
 
-; CHECK-LABEL: @minnum_test(
-; CHECK: select
+; ALL-LABEL: @minnum_test(
+; ALL: select
 define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -119,8 +111,8 @@ test_minnum.exit:
   ret void
 }
 
-; CHECK-LABEL: @maxnum_test(
-; CHECK: select
+; ALL-LABEL: @maxnum_test(
+; ALL: select
 define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00