[llvm] r300989 - [ConstHoisting] Add BFI in constanthoisting pass and select the best insertion

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 21 15:49:24 PDT 2017


On Fri, Apr 21, 2017 at 3:41 PM, Chandler Carruth <chandlerc at gmail.com> wrote:
> On Fri, Apr 21, 2017 at 9:03 AM Wei Mi via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>>
>> Author: wmi
>> Date: Fri Apr 21 10:50:16 2017
>> New Revision: 300989
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=300989&view=rev
>> Log:
>> [ConstHoisting] Add BFI in constanthoisting pass and select the best
>> insertion
>> places based on it.
>>
>> Existing constant hoisting pass will merge a group of contants in a small
>> range
>> and hoist the const materialization code to the common dominator of their
>> uses.
>> However, if the uses are all in cold pathes, existing implementation may
>> hoist
>> the materialization code from cold pathes to a hot place. This may hurt
>> performance.
>> The patch introduces BFI to the pass and selects the best insertion places
>> based
>> on it.
>>
>> The change is controlled by an option consthoist-with-block-frequency
>> which is
>> off by default for now.
>
>
> Is there a plan to turn it on by default, and better yet, remove the flag?
> The compile time numbers seem to indicate this isn't very expensive and all
> things being equal it would be nice to avoid an ever growing number of
> flags...

Yes, I plan to turn it on by default soon. I can remove the flag after
the change is stable. Before that, it is easier to turn the flag off
instead of reverting the whole change when we see correctness or
performance regressions.

Thanks,
Wei.

>
>>
>>
>> Differential Revision: https://reviews.llvm.org/D28962
>>
>> Added:
>>     llvm/trunk/test/CodeGen/X86/constant-hoisting-bfi.ll
>> Modified:
>>     llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
>>     llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp
>>     llvm/trunk/test/CodeGen/X86/fold-tied-op.ll
>>     llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll
>>
>> Modified: llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h?rev=300989&r1=300988&r2=300989&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
>> (original)
>> +++ llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h Fri Apr
>> 21 10:50:16 2017
>> @@ -36,6 +36,7 @@
>>  #ifndef LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
>>  #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
>>
>> +#include "llvm/Analysis/BlockFrequencyInfo.h"
>>  #include "llvm/Analysis/TargetTransformInfo.h"
>>  #include "llvm/IR/Dominators.h"
>>  #include "llvm/IR/PassManager.h"
>> @@ -98,7 +99,7 @@ public:
>>
>>    // Glue for old PM.
>>    bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
>> -               BasicBlock &Entry);
>> +               BlockFrequencyInfo *BFI, BasicBlock &Entry);
>>
>>    void releaseMemory() {
>>      ConstantVec.clear();
>> @@ -112,6 +113,7 @@ private:
>>
>>    const TargetTransformInfo *TTI;
>>    DominatorTree *DT;
>> +  BlockFrequencyInfo *BFI;
>>    BasicBlock *Entry;
>>
>>    /// Keeps track of constant candidates found in the function.
>> @@ -124,8 +126,8 @@ private:
>>    SmallVector<consthoist::ConstantInfo, 8> ConstantVec;
>>
>>    Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U)
>> const;
>> -  Instruction *findConstantInsertionPoint(
>> -      const consthoist::ConstantInfo &ConstInfo) const;
>> +  SmallPtrSet<Instruction *, 8>
>> +  findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo)
>> const;
>>    void collectConstantCandidates(ConstCandMapType &ConstCandMap,
>>                                   Instruction *Inst, unsigned Idx,
>>                                   ConstantInt *ConstInt);
>>
>> Modified: llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp?rev=300989&r1=300988&r2=300989&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp Fri Apr 21
>> 10:50:16 2017
>> @@ -53,6 +53,12 @@ using namespace consthoist;
>>  STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
>>  STATISTIC(NumConstantsRebased, "Number of constants rebased");
>>
>> +static cl::opt<bool> ConstHoistWithBlockFrequency(
>> +    "consthoist-with-block-frequency", cl::init(false), cl::Hidden,
>> +    cl::desc("Enable the use of the block frequency analysis to reduce
>> the "
>> +             "chance to execute const materialization more frequently
>> than "
>> +             "without hoisting."));
>> +
>>  namespace {
>>  /// \brief The constant hoisting pass.
>>  class ConstantHoistingLegacyPass : public FunctionPass {
>> @@ -68,6 +74,8 @@ public:
>>
>>    void getAnalysisUsage(AnalysisUsage &AU) const override {
>>      AU.setPreservesCFG();
>> +    if (ConstHoistWithBlockFrequency)
>> +      AU.addRequired<BlockFrequencyInfoWrapperPass>();
>>      AU.addRequired<DominatorTreeWrapperPass>();
>>      AU.addRequired<TargetTransformInfoWrapperPass>();
>>    }
>> @@ -82,6 +90,7 @@ private:
>>  char ConstantHoistingLegacyPass::ID = 0;
>>  INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
>>                        "Constant Hoisting", false, false)
>> +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
>>  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
>>  INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
>>  INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
>> @@ -99,9 +108,13 @@ bool ConstantHoistingLegacyPass::runOnFu
>>    DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
>>    DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
>>
>> -  bool MadeChange = Impl.runImpl(
>> -      Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
>> -      getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
>> Fn.getEntryBlock());
>> +  bool MadeChange =
>> +      Impl.runImpl(Fn,
>> getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
>> +                   getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
>> +                   ConstHoistWithBlockFrequency
>> +                       ?
>> &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
>> +                       : nullptr,
>> +                   Fn.getEntryBlock());
>>
>>    if (MadeChange) {
>>      DEBUG(dbgs() << "********** Function after Constant Hoisting: "
>> @@ -148,33 +161,142 @@ Instruction *ConstantHoistingPass::findM
>>    return IDom->getBlock()->getTerminator();
>>  }
>>
>> +/// \brief Given \p BBs as input, find another set of BBs which
>> collectively
>> +/// dominates \p BBs and have the minimal sum of frequencies. Return the
>> BB
>> +/// set found in \p BBs.
>> +void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
>> +                          BasicBlock *Entry,
>> +                          SmallPtrSet<BasicBlock *, 8> &BBs) {
>> +  assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
>> +  // Nodes on the current path to the root.
>> +  SmallPtrSet<BasicBlock *, 8> Path;
>> +  // Candidates includes any block 'BB' in set 'BBs' that is not strictly
>> +  // dominated by any other blocks in set 'BBs', and all nodes in the
>> path
>> +  // in the dominator tree from Entry to 'BB'.
>> +  SmallPtrSet<BasicBlock *, 16> Candidates;
>> +  for (auto BB : BBs) {
>> +    Path.clear();
>> +    // Walk up the dominator tree until Entry or another BB in BBs
>> +    // is reached. Insert the nodes on the way to the Path.
>> +    BasicBlock *Node = BB;
>> +    // The "Path" is a candidate path to be added into Candidates set.
>> +    bool isCandidate = false;
>> +    do {
>> +      Path.insert(Node);
>> +      if (Node == Entry || Candidates.count(Node)) {
>> +        isCandidate = true;
>> +        break;
>> +      }
>> +      assert(DT.getNode(Node)->getIDom() &&
>> +             "Entry doens't dominate current Node");
>> +      Node = DT.getNode(Node)->getIDom()->getBlock();
>> +    } while (!BBs.count(Node));
>> +
>> +    // If isCandidate is false, Node is another Block in BBs dominating
>> +    // current 'BB'. Drop the nodes on the Path.
>> +    if (!isCandidate)
>> +      continue;
>> +
>> +    // Add nodes on the Path into Candidates.
>> +    Candidates.insert(Path.begin(), Path.end());
>> +  }
>> +
>> +  // Sort the nodes in Candidates in top-down order and save the nodes
>> +  // in Orders.
>> +  unsigned Idx = 0;
>> +  SmallVector<BasicBlock *, 16> Orders;
>> +  Orders.push_back(Entry);
>> +  while (Idx != Orders.size()) {
>> +    BasicBlock *Node = Orders[Idx++];
>> +    for (auto ChildDomNode : DT.getNode(Node)->getChildren()) {
>> +      if (Candidates.count(ChildDomNode->getBlock()))
>> +        Orders.push_back(ChildDomNode->getBlock());
>> +    }
>> +  }
>> +
>> +  // Visit Orders in bottom-up order.
>> +  typedef std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>
>> +      InsertPtsCostPair;
>> +  // InsertPtsMap is a map from a BB to the best insertion points for the
>> +  // subtree of BB (subtree not including the BB itself).
>> +  DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
>> +  InsertPtsMap.reserve(Orders.size() + 1);
>> +  for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
>> +    BasicBlock *Node = *RIt;
>> +    bool NodeInBBs = BBs.count(Node);
>> +    SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
>> +    BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;
>> +
>> +    // Return the optimal insert points in BBs.
>> +    if (Node == Entry) {
>> +      BBs.clear();
>> +      if (InsertPtsFreq > BFI.getBlockFreq(Node))
>> +        BBs.insert(Entry);
>> +      else
>> +        BBs.insert(InsertPts.begin(), InsertPts.end());
>> +      break;
>> +    }
>> +
>> +    BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
>> +    // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every
>> child
>> +    // will update its parent's ParentInsertPts and ParentPtsFreq.
>> +    SmallPtrSet<BasicBlock *, 16> &ParentInsertPts =
>> InsertPtsMap[Parent].first;
>> +    BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
>> +    // Choose to insert in Node or in subtree of Node.
>> +    if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) {
>> +      ParentInsertPts.insert(Node);
>> +      ParentPtsFreq += BFI.getBlockFreq(Node);
>> +    } else {
>> +      ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());
>> +      ParentPtsFreq += InsertPtsFreq;
>> +    }
>> +  }
>> +}
>> +
>>  /// \brief Find an insertion point that dominates all uses.
>> -Instruction *ConstantHoistingPass::findConstantInsertionPoint(
>> +SmallPtrSet<Instruction *, 8>
>> ConstantHoistingPass::findConstantInsertionPoint(
>>      const ConstantInfo &ConstInfo) const {
>>    assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info
>> entry.");
>>    // Collect all basic blocks.
>>    SmallPtrSet<BasicBlock *, 8> BBs;
>> +  SmallPtrSet<Instruction *, 8> InsertPts;
>>    for (auto const &RCI : ConstInfo.RebasedConstants)
>>      for (auto const &U : RCI.Uses)
>>        BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
>>
>> -  if (BBs.count(Entry))
>> -    return &Entry->front();
>> +  if (BBs.count(Entry)) {
>> +    InsertPts.insert(&Entry->front());
>> +    return InsertPts;
>> +  }
>> +
>> +  if (BFI) {
>> +    findBestInsertionSet(*DT, *BFI, Entry, BBs);
>> +    for (auto BB : BBs) {
>> +      BasicBlock::iterator InsertPt = BB->begin();
>> +      for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
>> +        ;
>> +      InsertPts.insert(&*InsertPt);
>> +    }
>> +    return InsertPts;
>> +  }
>>
>>    while (BBs.size() >= 2) {
>>      BasicBlock *BB, *BB1, *BB2;
>>      BB1 = *BBs.begin();
>>      BB2 = *std::next(BBs.begin());
>>      BB = DT->findNearestCommonDominator(BB1, BB2);
>> -    if (BB == Entry)
>> -      return &Entry->front();
>> +    if (BB == Entry) {
>> +      InsertPts.insert(&Entry->front());
>> +      return InsertPts;
>> +    }
>>      BBs.erase(BB1);
>>      BBs.erase(BB2);
>>      BBs.insert(BB);
>>    }
>>    assert((BBs.size() == 1) && "Expected only one element.");
>>    Instruction &FirstInst = (*BBs.begin())->front();
>> -  return findMatInsertPt(&FirstInst);
>> +  InsertPts.insert(findMatInsertPt(&FirstInst));
>> +  return InsertPts;
>>  }
>>
>>
>> @@ -557,29 +679,54 @@ bool ConstantHoistingPass::emitBaseConst
>>    bool MadeChange = false;
>>    for (auto const &ConstInfo : ConstantVec) {
>>      // Hoist and hide the base constant behind a bitcast.
>> -    Instruction *IP = findConstantInsertionPoint(ConstInfo);
>> -    IntegerType *Ty = ConstInfo.BaseConstant->getType();
>> -    Instruction *Base =
>> -      new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
>> -    DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ")
>> to BB "
>> -                 << IP->getParent()->getName() << '\n' << *Base << '\n');
>> -    NumConstantsHoisted++;
>> +    SmallPtrSet<Instruction *, 8> IPSet =
>> findConstantInsertionPoint(ConstInfo);
>> +    assert(!IPSet.empty() && "IPSet is empty");
>>
>> -    // Emit materialization code for all rebased constants.
>> -    for (auto const &RCI : ConstInfo.RebasedConstants) {
>> -      NumConstantsRebased++;
>> -      for (auto const &U : RCI.Uses)
>> -        emitBaseConstants(Base, RCI.Offset, U);
>> -    }
>> +    unsigned UsesNum = 0;
>> +    unsigned ReBasesNum = 0;
>> +    for (Instruction *IP : IPSet) {
>> +      IntegerType *Ty = ConstInfo.BaseConstant->getType();
>> +      Instruction *Base =
>> +          new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
>> +      DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
>> +                   << ") to BB " << IP->getParent()->getName() << '\n'
>> +                   << *Base << '\n');
>> +
>> +      // Emit materialization code for all rebased constants.
>> +      unsigned Uses = 0;
>> +      for (auto const &RCI : ConstInfo.RebasedConstants) {
>> +        for (auto const &U : RCI.Uses) {
>> +          Uses++;
>> +          BasicBlock *OrigMatInsertBB =
>> +              findMatInsertPt(U.Inst, U.OpndIdx)->getParent();
>> +          // If Base constant is to be inserted in multiple places,
>> +          // generate rebase for U using the Base dominating U.
>> +          if (IPSet.size() == 1 ||
>> +              DT->dominates(Base->getParent(), OrigMatInsertBB)) {
>> +            emitBaseConstants(Base, RCI.Offset, U);
>> +            ReBasesNum++;
>> +          }
>> +        }
>> +      }
>> +      UsesNum = Uses;
>> +
>> +      // Use the same debug location as the last user of the constant.
>> +      assert(!Base->use_empty() && "The use list is empty!?");
>> +      assert(isa<Instruction>(Base->user_back()) &&
>> +             "All uses should be instructions.");
>> +
>> Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
>> +    }
>> +    (void)UsesNum;
>> +    (void)ReBasesNum;
>> +    // Expect all uses are rebased after rebase is done.
>> +    assert(UsesNum == ReBasesNum && "Not all uses are rebased");
>> +
>> +    NumConstantsHoisted++;
>>
>> -    // Use the same debug location as the last user of the constant.
>> -    assert(!Base->use_empty() && "The use list is empty!?");
>> -    assert(isa<Instruction>(Base->user_back()) &&
>> -           "All uses should be instructions.");
>> -
>> Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
>> +    // Base constant is also included in ConstInfo.RebasedConstants, so
>> +    // deduct 1 from ConstInfo.RebasedConstants.size().
>> +    NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
>>
>> -    // Correct for base constant, which we counted above too.
>> -    NumConstantsRebased--;
>>      MadeChange = true;
>>    }
>>    return MadeChange;
>> @@ -595,9 +742,11 @@ void ConstantHoistingPass::deleteDeadCas
>>
>>  /// \brief Optimize expensive integer constants in the given function.
>>  bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo
>> &TTI,
>> -                                   DominatorTree &DT, BasicBlock &Entry)
>> {
>> +                                   DominatorTree &DT, BlockFrequencyInfo
>> *BFI,
>> +                                   BasicBlock &Entry) {
>>    this->TTI = &TTI;
>>    this->DT = &DT;
>> +  this->BFI = BFI;
>>    this->Entry = &Entry;
>>    // Collect all constant candidates.
>>    collectConstantCandidates(Fn);
>> @@ -628,7 +777,10 @@ PreservedAnalyses ConstantHoistingPass::
>>                                              FunctionAnalysisManager &AM)
>> {
>>    auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
>>    auto &TTI = AM.getResult<TargetIRAnalysis>(F);
>> -  if (!runImpl(F, TTI, DT, F.getEntryBlock()))
>> +  auto BFI = ConstHoistWithBlockFrequency
>> +                 ? &AM.getResult<BlockFrequencyAnalysis>(F)
>> +                 : nullptr;
>> +  if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
>>      return PreservedAnalyses::all();
>>
>>    PreservedAnalyses PA;
>>
>> Added: llvm/trunk/test/CodeGen/X86/constant-hoisting-bfi.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/constant-hoisting-bfi.ll?rev=300989&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/constant-hoisting-bfi.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/constant-hoisting-bfi.ll Fri Apr 21
>> 10:50:16 2017
>> @@ -0,0 +1,115 @@
>> +; RUN: opt -consthoist -mtriple=x86_64-unknown-linux-gnu
>> -consthoist-with-block-frequency=true -S < %s | FileCheck %s
>> +
>> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>> +
>> +; Check when BFI is enabled for constant hoisting, constant 214748364701
>> +; will not be hoisted to the func entry.
>> +; CHECK-LABEL: @foo(
>> +; CHECK: entry:
>> +; CHECK-NOT: bitcast i64 214748364701 to i64
>> +; CHECK: if.then:
>> +
>> +; Function Attrs: norecurse nounwind uwtable
>> +define i64 @foo(i64* nocapture %a) {
>> +entry:
>> +  %arrayidx = getelementptr inbounds i64, i64* %a, i64 9
>> +  %t0 = load i64, i64* %arrayidx, align 8
>> +  %cmp = icmp slt i64 %t0, 564
>> +  br i1 %cmp, label %if.then, label %if.else5
>> +
>> +if.then:                                          ; preds = %entry
>> +  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 5
>> +  %t1 = load i64, i64* %arrayidx1, align 8
>> +  %cmp2 = icmp slt i64 %t1, 1009
>> +  br i1 %cmp2, label %if.then3, label %return
>> +
>> +if.then3:                                         ; preds = %if.then
>> +  %arrayidx4 = getelementptr inbounds i64, i64* %a, i64 6
>> +  %t2 = load i64, i64* %arrayidx4, align 8
>> +  %inc = add nsw i64 %t2, 1
>> +  store i64 %inc, i64* %arrayidx4, align 8
>> +  br label %return
>> +
>> +if.else5:                                         ; preds = %entry
>> +  %arrayidx6 = getelementptr inbounds i64, i64* %a, i64 6
>> +  %t3 = load i64, i64* %arrayidx6, align 8
>> +  %cmp7 = icmp slt i64 %t3, 3512
>> +  br i1 %cmp7, label %if.then8, label %return
>> +
>> +if.then8:                                         ; preds = %if.else5
>> +  %arrayidx9 = getelementptr inbounds i64, i64* %a, i64 7
>> +  %t4 = load i64, i64* %arrayidx9, align 8
>> +  %inc10 = add nsw i64 %t4, 1
>> +  store i64 %inc10, i64* %arrayidx9, align 8
>> +  br label %return
>> +
>> +return:                                           ; preds = %if.else5,
>> %if.then, %if.then8, %if.then3
>> +  %retval.0 = phi i64 [ 214748364701, %if.then3 ], [ 214748364701,
>> %if.then8 ], [ 250148364702, %if.then ], [ 256148364704, %if.else5 ]
>> +  ret i64 %retval.0
>> +}
>> +
>> +; Check when BFI is enabled for constant hoisting, constant 214748364701
>> +; in while.body will be hoisted to while.body.preheader. 214748364701 in
>> +; if.then16 and if.else10 will be merged and hoisted to the beginning of
>> +; if.else10 because if.else10 dominates if.then16.
>> +; CHECK-LABEL: @goo(
>> +; CHECK: entry:
>> +; CHECK-NOT: bitcast i64 214748364701 to i64
>> +; CHECK: while.body.preheader:
>> +; CHECK-NEXT: bitcast i64 214748364701 to i64
>> +; CHECK-NOT: bitcast i64 214748364701 to i64
>> +; CHECK: if.else10:
>> +; CHECK-NEXT: bitcast i64 214748364701 to i64
>> +; CHECK-NOT: bitcast i64 214748364701 to i64
>> +define i64 @goo(i64* nocapture %a) {
>> +entry:
>> +  %arrayidx = getelementptr inbounds i64, i64* %a, i64 9
>> +  %t0 = load i64, i64* %arrayidx, align 8
>> +  %cmp = icmp ult i64 %t0, 56
>> +  br i1 %cmp, label %if.then, label %if.else10, !prof !0
>> +
>> +if.then:                                          ; preds = %entry
>> +  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 5
>> +  %t1 = load i64, i64* %arrayidx1, align 8
>> +  %cmp2 = icmp ult i64 %t1, 10
>> +  br i1 %cmp2, label %while.cond.preheader, label %return, !prof !0
>> +
>> +while.cond.preheader:                             ; preds = %if.then
>> +  %arrayidx7 = getelementptr inbounds i64, i64* %a, i64 6
>> +  %t2 = load i64, i64* %arrayidx7, align 8
>> +  %cmp823 = icmp ugt i64 %t2, 10000
>> +  br i1 %cmp823, label %while.body.preheader, label %return
>> +
>> +while.body.preheader:                             ; preds =
>> %while.cond.preheader
>> +  br label %while.body
>> +
>> +while.body:                                       ; preds =
>> %while.body.preheader, %while.body
>> +  %t3 = phi i64 [ %add, %while.body ], [ %t2, %while.body.preheader ]
>> +  %add = add i64 %t3, 214748364701
>> +  %cmp8 = icmp ugt i64 %add, 10000
>> +  br i1 %cmp8, label %while.body, label
>> %while.cond.return.loopexit_crit_edge
>> +
>> +if.else10:                                        ; preds = %entry
>> +  %arrayidx11 = getelementptr inbounds i64, i64* %a, i64 6
>> +  %t4 = load i64, i64* %arrayidx11, align 8
>> +  %add2 = add i64 %t4, 214748364701
>> +  %cmp12 = icmp ult i64 %add2, 35
>> +  br i1 %cmp12, label %if.then16, label %return, !prof !0
>> +
>> +if.then16:                                        ; preds = %if.else10
>> +  %arrayidx17 = getelementptr inbounds i64, i64* %a, i64 7
>> +  %t5 = load i64, i64* %arrayidx17, align 8
>> +  %inc = add i64 %t5, 1
>> +  store i64 %inc, i64* %arrayidx17, align 8
>> +  br label %return
>> +
>> +while.cond.return.loopexit_crit_edge:             ; preds = %while.body
>> +  store i64 %add, i64* %arrayidx7, align 8
>> +  br label %return
>> +
>> +return:                                           ; preds =
>> %while.cond.preheader, %while.cond.return.loopexit_crit_edge, %if.else10,
>> %if.then, %if.then16
>> +  %retval.0 = phi i64 [ 214748364701, %if.then16 ], [ 0, %if.then ], [ 0,
>> %if.else10 ], [ 0, %while.cond.return.loopexit_crit_edge ], [ 0,
>> %while.cond.preheader ]
>> +  ret i64 %retval.0
>> +}
>> +
>> +!0 = !{!"branch_weights", i32 1, i32 2000}
>>
>> Modified: llvm/trunk/test/CodeGen/X86/fold-tied-op.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-tied-op.ll?rev=300989&r1=300988&r2=300989&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/fold-tied-op.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/fold-tied-op.ll Fri Apr 21 10:50:16 2017
>> @@ -7,7 +7,6 @@ target triple = "i386--netbsd"
>>
>>  ; CHECK-LABEL: fn1
>>  ; CHECK:       addl  {{.*#+}} 4-byte Folded Reload
>> -; CHECK:       addl  {{.*#+}} 4-byte Folded Reload
>>  ; CHECK:       imull {{.*#+}} 4-byte Folded Reload
>>  ; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
>>  ; CHECK:       retl
>>
>> Modified: llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll?rev=300989&r1=300988&r2=300989&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll (original)
>> +++ llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll Fri Apr 21
>> 10:50:16 2017
>> @@ -1,4 +1,5 @@
>>  ; RUN: opt -S -consthoist < %s | FileCheck %s
>> +; RUN: opt -S -consthoist -consthoist-with-block-frequency=true < %s |
>> FileCheck --check-prefix=BFIHOIST %s
>>
>>  ; FIXME: The catchpad doesn't even use the constant, so a better fix
>> would be to
>>  ; insert the bitcast in the catchpad block.
>> @@ -11,6 +12,16 @@ target triple = "x86_64-pc-windows-msvc"
>>  ; CHECK-NEXT: bitcast i64 9209618997431186100 to i64
>>  ; CHECK-NEXT: br i1 %tobool
>>
>> +; BFIHOIST-LABEL: define i32 @main
>> +; BFIHOIST: then:
>> +; BFIHOIST: %[[CONST1:.*]] = bitcast i64 9209618997431186100 to i64
>> +; BFIHOIST: %add = add i64 %call4, %[[CONST1]]
>> +; BFIHOIST: br label %endif
>> +; BFIHOIST: else:
>> +; BFIHOIST: %[[CONST2:.*]] = bitcast i64 9209618997431186100 to i64
>> +; BFIHOIST: %add6 = add i64 %call5, %[[CONST2]]
>> +; BFIHOIST: br label %endif
>> +
>>  ; Function Attrs: norecurse
>>  define i32 @main(i32 %argc, i8** nocapture readnone %argv)
>> local_unnamed_addr #0 personality i8* bitcast (i32 (...)*
>> @__CxxFrameHandler3 to i8*) {
>>    %call = tail call i64 @fn(i64 0)
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list