[llvm] 97c3ef5 - [SelectOpti][2/5] Select-to-branch base transformation

via llvm-commits llvm-commits at lists.llvm.org
Mon May 23 13:14:42 PDT 2022


Author: Sotiris Apostolakis
Date: 2022-05-23T16:11:40-04:00
New Revision: 97c3ef5c8a289ca54ca0c61c75fd00adab92b7c0

URL: https://github.com/llvm/llvm-project/commit/97c3ef5c8a289ca54ca0c61c75fd00adab92b7c0
DIFF: https://github.com/llvm/llvm-project/commit/97c3ef5c8a289ca54ca0c61c75fd00adab92b7c0.diff

LOG: [SelectOpti][2/5] Select-to-branch base transformation

This patch implements the actual transformation of selects to branches.
It includes only the base transformation without any sinking.

Depends on D120230

Reviewed By: davidxl

Differential Revision: https://reviews.llvm.org/D122259

Added: 
    llvm/test/CodeGen/X86/select-optimize.ll

Modified: 
    llvm/lib/CodeGen/SelectOptimize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index a0260128b36b4..337d825b1dcd5 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,16 +10,40 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
+#define DEBUG_TYPE "select-optimize"
+
+STATISTIC(NumSelectsConverted, "Number of selects converted");
+
 namespace {
 
 class SelectOptimize : public FunctionPass {
+  const TargetMachine *TM = nullptr;
+  const TargetSubtargetInfo *TSI;
+  const TargetLowering *TLI = nullptr;
+  const LoopInfo *LI;
+  std::unique_ptr<BlockFrequencyInfo> BFI;
+  std::unique_ptr<BranchProbabilityInfo> BPI;
+
 public:
   static char ID;
   SelectOptimize() : FunctionPass(ID) {
@@ -28,16 +52,218 @@ class SelectOptimize : public FunctionPass {
 
   bool runOnFunction(Function &F) override;
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {}
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetPassConfig>();
+    AU.addRequired<LoopInfoWrapperPass>();
+  }
+
+private:
+  // Select groups consist of consecutive select instructions with the same
+  // condition.
+  using SelectGroup = SmallVector<SelectInst *, 2>;
+  using SelectGroups = SmallVector<SelectGroup, 2>;
+
+  bool optimizeSelects(Function &F);
+  void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
+  void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
+  bool isSelectKindSupported(SelectInst *SI);
 };
 } // namespace
 
 char SelectOptimize::ID = 0;
-INITIALIZE_PASS(SelectOptimize, "select-optimize", "Optimize selects", false,
-                false)
+
+INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+                    false)
 
 FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
 
 bool SelectOptimize::runOnFunction(Function &F) {
-  llvm_unreachable("Unimplemented");
+  TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+  TSI = TM->getSubtargetImpl(F);
+  TLI = TSI->getTargetLowering();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  BPI.reset(new BranchProbabilityInfo(F, *LI));
+  BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+
+  return optimizeSelects(F);
+}
+
+bool SelectOptimize::optimizeSelects(Function &F) {
+  // Collect all the select groups.
+  SelectGroups SIGroups;
+  for (BasicBlock &BB : F) {
+    collectSelectGroups(BB, SIGroups);
+  }
+
+  // Determine for which select groups it is profitable converting to branches.
+  SelectGroups ProfSIGroups;
+  // For now assume that all select groups can be profitably converted to
+  // branches.
+  for (SelectGroup &ASI : SIGroups) {
+    ProfSIGroups.push_back(ASI);
+  }
+
+  // Convert to branches the select groups that were deemed
+  // profitable-to-convert.
+  convertProfitableSIGroups(ProfSIGroups);
+
+  // Code modified if at least one select group was converted.
+  return !ProfSIGroups.empty();
+}
+
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *
+getTrueOrFalseValue(SelectInst *SI, bool isTrue,
+                    const SmallPtrSet<const Instruction *, 2> &Selects) {
+  Value *V = nullptr;
+  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+       DefSI = dyn_cast<SelectInst>(V)) {
+    assert(DefSI->getCondition() == SI->getCondition() &&
+           "The condition of DefSI does not match with SI");
+    V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+  }
+  assert(V && "Failed to get select true/false value");
+  return V;
+}
+
+void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+  for (SelectGroup &ASI : ProfSIGroups) {
+    // TODO: eliminate the redundancy of logic transforming selects to branches
+    // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
+    // selects for all cases (with and without profile information).
+
+    // Transform a sequence like this:
+    //    start:
+    //       %cmp = cmp uge i32 %a, %b
+    //       %sel = select i1 %cmp, i32 %c, i32 %d
+    //
+    // Into:
+    //    start:
+    //       %cmp = cmp uge i32 %a, %b
+    //       %cmp.frozen = freeze %cmp
+    //       br i1 %cmp.frozen, label %select.end, label %select.false
+    //    select.false:
+    //       br label %select.end
+    //    select.end:
+    //       %sel = phi i32 [ %c, %start ], [ %d, %select.false ]
+    //
+    // %cmp should be frozen, otherwise it may introduce undefined behavior.
+
+    // We split the block containing the select(s) into two blocks.
+    SelectInst *SI = ASI.front();
+    SelectInst *LastSI = ASI.back();
+    BasicBlock *StartBlock = SI->getParent();
+    BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
+    BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+    BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+    // Delete the unconditional branch that was just created by the split.
+    StartBlock->getTerminator()->eraseFromParent();
+
+    // Move any debug/pseudo instructions that were in-between the select
+    // group to the newly-created end block.
+    SmallVector<Instruction *, 2> DebugPseudoINS;
+    auto DIt = SI->getIterator();
+    while (&*DIt != LastSI) {
+      if (DIt->isDebugOrPseudoInst())
+        DebugPseudoINS.push_back(&*DIt);
+      DIt++;
+    }
+    for (auto DI : DebugPseudoINS) {
+      DI->moveBefore(&*EndBlock->getFirstInsertionPt());
+    }
+
+    // These are the new basic blocks for the conditional branch.
+    // For now, no instruction sinking to the true/false blocks.
+    // Thus both True and False blocks will be empty.
+    BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
+
+    // Use the 'false' side for a new input value to the PHI.
+    FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+                                    EndBlock->getParent(), EndBlock);
+    auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+    FalseBranch->setDebugLoc(SI->getDebugLoc());
+
+    // For the 'true' side the path originates from the start block from the
+    // point view of the new PHI.
+    TrueBlock = StartBlock;
+
+    // Insert the real conditional branch based on the original condition.
+    BasicBlock *TT, *FT;
+    TT = EndBlock;
+    FT = FalseBlock;
+    IRBuilder<> IB(SI);
+    auto *CondFr =
+        IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
+    IB.CreateCondBr(CondFr, TT, FT, SI);
+
+    SmallPtrSet<const Instruction *, 2> INS;
+    INS.insert(ASI.begin(), ASI.end());
+    // Use reverse iterator because later select may use the value of the
+    // earlier select, and we need to propagate value through earlier select
+    // to get the PHI operand.
+    for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
+      SelectInst *SI = *It;
+      // The select itself is replaced with a PHI Node.
+      PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+      PN->takeName(SI);
+      PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+      PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+      PN->setDebugLoc(SI->getDebugLoc());
+
+      SI->replaceAllUsesWith(PN);
+      SI->eraseFromParent();
+      INS.erase(SI);
+      ++NumSelectsConverted;
+    }
+  }
+}
+
+void SelectOptimize::collectSelectGroups(BasicBlock &BB,
+                                         SelectGroups &SIGroups) {
+  BasicBlock::iterator BBIt = BB.begin();
+  while (BBIt != BB.end()) {
+    Instruction *I = &*BBIt++;
+    if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+      SelectGroup SIGroup;
+      SIGroup.push_back(SI);
+      while (BBIt != BB.end()) {
+        Instruction *NI = &*BBIt;
+        SelectInst *NSI = dyn_cast<SelectInst>(NI);
+        if (NSI && SI->getCondition() == NSI->getCondition()) {
+          SIGroup.push_back(NSI);
+        } else if (!NI->isDebugOrPseudoInst()) {
+          // Debug/pseudo instructions should be skipped and not prevent the
+          // formation of a select group.
+          break;
+        }
+        ++BBIt;
+      }
+
+      // If the select type is not supported, no point optimizing it.
+      // Instruction selection will take care of it.
+      if (!isSelectKindSupported(SI))
+        continue;
+
+      SIGroups.push_back(SIGroup);
+    }
+  }
+}
+
+bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+  if (VectorCond)
+    return false;
+  TargetLowering::SelectSupportKind SelectKind;
+  if (SI->getType()->isVectorTy())
+    SelectKind = TargetLowering::ScalarCondVectorVal;
+  else
+    SelectKind = TargetLowering::ScalarValSelect;
+  return TLI->isSelectSupported(SelectKind);
 }

diff  --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll
new file mode 100644
index 0000000000000..300fb4de312db
--- /dev/null
+++ b/llvm/test/CodeGen/X86/select-optimize.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s
+
+; Single select converted to branch
+define i32 @single_select(i32 %a, i32 %b, i1 %cmp) {
+; CHECK-LABEL: @single_select(
+; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
+; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0
+  ret i32 %sel
+}
+
+; Select group converted to branch
+define i32 @select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) {
+; CHECK-LABEL: @select_group(
+; CHECK-NEXT:    [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
+; CHECK-NEXT:    br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
+; CHECK-NEXT:    [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0
+  call void @llvm.dbg.value(metadata i32 %sel1, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !3)
+  %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !0
+  %add = add i32 %sel1, %sel2
+  ret i32 %add
+}
+
+; Select group with intra-group dependence converted to branch
+define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) {
+; CHECK-LABEL: @select_group_intra_group(
+; CHECK-NEXT:    [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
+; CHECK-NEXT:    br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
+; CHECK-NEXT:    [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[B]], [[SELECT_FALSE]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0
+  %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !0
+  %sub = sub i32 %sel1, %sel2
+  ret i32 %sub
+}
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.module.flags = !{!6, !7}
+
+!0 = !{!"branch_weights", i32 1, i32 100}
+!1 = !DIFile(filename: "test.c", directory: "/test")
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !5, splitDebugInlining: false, nameTableKind: None)
+!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !2)
+!4 = !DILocalVariable(name: "x", scope: !3)
+!5 = !{}
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 1, !"Debug Info Version", i32 3}


        


More information about the llvm-commits mailing list