[llvm] r250743 - [CGP] transform select instructions into branches and sink expensive operands

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 19 14:59:13 PDT 2015


Author: spatel
Date: Mon Oct 19 16:59:12 2015
New Revision: 250743

URL: http://llvm.org/viewvc/llvm-project?rev=250743&view=rev
Log:
[CGP] transform select instructions into branches and sink expensive operands

This was originally checked in at r250527, but reverted at r250570 because of PR25222.
There were at least 2 problems: 
1. The cost check was checking for an instruction with an exact cost of TCC_Expensive;
that should have been >=.
2. The cause of the clang stage 1 failures was illegally sinking 'call' instructions;
we can't sink instructions that may have side effects / are not safe to execute speculatively.

Fixed those conditions in sinkSelectOperand() and added test cases.

Original commit message:
This is a follow-up to the discussion in D12882.

Ideally, we would like SimplifyCFG to be able to form select instructions even when the operands
are expensive (as defined by the TTI cost model) because that may expose further optimizations.
However, we would then like a later pass like CodeGenPrepare to undo that transformation if the
target would likely benefit from not speculatively executing an expensive op (this patch).

Once we have this safety mechanism in place, we can adjust SimplifyCFG to restore its
select-formation behavior that changed with r248439.

Differential Revision: http://reviews.llvm.org/D13297

Added:
    llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll
Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=250743&r1=250742&r2=250743&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Mon Oct 19 16:59:12 2015
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -3846,8 +3847,22 @@ bool CodeGenPrepare::optimizeExtUses(Ins
   return MadeChange;
 }
 
+/// Check if V (an operand of a select instruction) is an expensive instruction
+/// that is only used once.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+  auto *I = dyn_cast<Instruction>(V);
+  // If it's safe to speculatively execute, then it should not have side
+  // effects; therefore, it's safe to sink and possibly *not* execute.
+  if (I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
+      TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive)
+    return true;
+
+  return false;
+}
+
 /// Returns true if a SelectInst should be turned into an explicit branch.
-static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+                                                SelectInst *SI) {
   // FIXME: This should use the same heuristics as IfConversion to determine
   // whether a select is better represented as a branch.  This requires that
   // branch probability metadata is preserved for the select, which is not the
@@ -3868,8 +3883,17 @@ static bool isFormingBranchFromSelectPro
   // on a load from memory. But if the load is used more than once, do not
   // change the select to a branch because the load is probably needed
   // regardless of whether the branch is taken or not.
-  return ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
-          (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+  if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+      (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
+    return true;
+
+  // If either operand of the select is expensive and only needed on one side
+  // of the select, we should form a branch.
+  if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
+      sinkSelectOperand(TTI, SI->getFalseValue()))
+    return true;
+
+  return false;
 }
 
 
@@ -3895,34 +3919,97 @@ bool CodeGenPrepare::optimizeSelectInst(
     // We have efficient codegen support for the select instruction.
     // Check if it is profitable to keep this 'select'.
     if (!TLI->isPredictableSelectExpensive() ||
-        !isFormingBranchFromSelectProfitable(SI))
+        !isFormingBranchFromSelectProfitable(TTI, SI))
       return false;
   }
 
   ModifiedDT = true;
 
+  // Transform a sequence like this:
+  //    start:
+  //       %cmp = cmp uge i32 %a, %b
+  //       %sel = select i1 %cmp, i32 %c, i32 %d
+  //
+  // Into:
+  //    start:
+  //       %cmp = cmp uge i32 %a, %b
+  //       br i1 %cmp, label %select.true, label %select.false
+  //    select.true:
+  //       br label %select.end
+  //    select.false:
+  //       br label %select.end
+  //    select.end:
+  //       %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+  //
+  // In addition, we may sink instructions that produce %c or %d from
+  // the entry block into the destination(s) of the new branch.
+  // If the true or false blocks do not contain a sunken instruction, that
+  // block and its branch may be optimized away. In that case, one side of the
+  // first branch will point directly to select.end, and the corresponding PHI
+  // predecessor block will be the start block.
+
   // First, we split the block containing the select into 2 blocks.
   BasicBlock *StartBlock = SI->getParent();
   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
-  BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+  BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
 
-  // Create a new block serving as the landing pad for the branch.
-  BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
-                                             NextBlock->getParent(), NextBlock);
-
-  // Move the unconditional branch from the block with the select in it into our
-  // landing pad block.
+  // Delete the unconditional branch that was just created by the split.
   StartBlock->getTerminator()->eraseFromParent();
-  BranchInst::Create(NextBlock, SmallBlock);
+
+  // These are the new basic blocks for the conditional branch.
+  // At least one will become an actual new basic block.
+  BasicBlock *TrueBlock = nullptr;
+  BasicBlock *FalseBlock = nullptr;
+
+  // Sink expensive instructions into the conditional blocks to avoid executing
+  // them speculatively.
+  if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+    TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+                                   EndBlock->getParent(), EndBlock);
+    auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+    auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+    TrueInst->moveBefore(TrueBranch);
+  }
+  if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+    FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+                                    EndBlock->getParent(), EndBlock);
+    auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+    auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+    FalseInst->moveBefore(FalseBranch);
+  }
+
+  // If there was nothing to sink, then arbitrarily choose the 'false' side
+  // for a new input value to the PHI.
+  if (TrueBlock == FalseBlock) {
+    assert(TrueBlock == nullptr &&
+           "Unexpected basic block transform while optimizing select");
+
+    FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+                                    EndBlock->getParent(), EndBlock);
+    BranchInst::Create(EndBlock, FalseBlock);
+  }
 
   // Insert the real conditional branch based on the original condition.
-  BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+  // If we did not create a new block for one of the 'true' or 'false' paths
+  // of the condition, it means that side of the branch goes to the end block
+  // directly and the path originates from the start block from the point of
+  // view of the new PHI.
+  if (TrueBlock == nullptr) {
+    BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+    TrueBlock = StartBlock;
+  } else if (FalseBlock == nullptr) {
+    BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+    FalseBlock = StartBlock;
+  } else {
+    BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+  }
 
   // The select itself is replaced with a PHI Node.
-  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &NextBlock->front());
+  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
   PN->takeName(SI);
-  PN->addIncoming(SI->getTrueValue(), StartBlock);
-  PN->addIncoming(SI->getFalseValue(), SmallBlock);
+  PN->addIncoming(SI->getTrueValue(), TrueBlock);
+  PN->addIncoming(SI->getFalseValue(), FalseBlock);
+
   SI->replaceAllUsesWith(PN);
   SI->eraseFromParent();
 

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll?rev=250743&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll Mon Oct 19 16:59:12 2015
@@ -0,0 +1,141 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink here, but this gets converted to a branch to
+; avoid stalling an out-of-order CPU on a predictable branch.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y)  {
+entry:
+  %load = load double, double* %b, align 8
+  %cmp = fcmp olt double %load, %a
+  %sel = select i1 %cmp, i32 %x, i32 %y
+  ret i32 %sel
+
+; CHECK-LABEL: @no_sink(
+; CHECK:    %load = load double, double* %b, align 8
+; CHECK:    %cmp = fcmp olt double %load, %a
+; CHECK:    br i1 %cmp, label %select.end, label %select.false
+; CHECK:  select.false:
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi i32 [ %x, %entry ], [ %y, %select.false ] 
+; CHECK:    ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %div, float 2.0
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 1.0
+; CHECK:    br i1 %cmp, label %select.true.sink, label %select.end
+; CHECK:  select.true.sink:
+; CHECK:    %div = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ]
+; CHECK:    ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 3.0
+  %sel = select i1 %cmp, float 4.0, float %div
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 3.0
+; CHECK:    br i1 %cmp, label %select.end, label %select.false.sink
+; CHECK:  select.false.sink:
+; CHECK:    %div = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ] 
+; CHECK:    ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+entry:
+  %div1 = fdiv float %a, %b
+  %div2 = fdiv float %b, %a
+  %cmp = fcmp ogt float %a, 5.0
+  %sel = select i1 %cmp, float %div1, float %div2
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 5.0
+; CHECK:    br i1 %cmp, label %select.true.sink, label %select.false.sink
+; CHECK:  select.true.sink:
+; CHECK:    %div1 = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.false.sink:
+; CHECK:    %div2 = fdiv float %b, %a
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ] 
+; CHECK:    ret float %sel
+}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+  %add = fadd float %a, %b
+  %cmp = fcmp ogt float 6.0, %a
+  %sel = select i1 %cmp, float %add, float 7.0 
+  ret float %sel
+
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK:  %sel = select i1 %cmp, float %add, float 7.0 
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %add = fadd float %div, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %add, float 8.0
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK:  %sel = select i1 %cmp, float %add, float 8.0 
+}
+
+; Do not transform the CFG if the select operands may have side effects.
+
+declare i64* @bar(i32, i32, i32)
+declare i64* @baz(i32, i32, i32)
+
+define i64* @calls_no_sink(i32 %in) {
+  %call1 = call i64* @bar(i32 1, i32 2, i32 3)
+  %call2 = call i64* @baz(i32 1, i32 2, i32 3)
+  %tobool = icmp ne i32 %in, 0
+  %sel = select i1 %tobool, i64* %call1, i64* %call2
+  ret i64* %sel
+
+; CHECK-LABEL: @calls_no_sink(
+; CHECK:  %sel = select i1 %tobool, i64* %call1, i64* %call2
+}
+
+define i32 @sdiv_no_sink(i32 %a, i32 %b) {
+  %div1 = sdiv i32 %a, %b
+  %div2 = sdiv i32 %b, %a
+  %cmp = icmp sgt i32 %a, 5
+  %sel = select i1 %cmp, i32 %div1, i32 %div2
+  ret i32 %sel
+
+; CHECK-LABEL: @sdiv_no_sink(
+; CHECK:  %sel = select i1 %cmp, i32 %div1, i32 %div2
+}
+




More information about the llvm-commits mailing list