[llvm] r321377 - [SimplifyCFG] Don't do if-conversion if there is a long dependence chain

Fri Dec 22 11:38:41 PST 2017

Reading through your test cases, I noticed a case where I think this 
change leads to an unprofitable result.  I'm not objecting to the change 
- it overall seems reasonable - but maybe there's a way to improve here?

Specifically, this test:

+define i64 @test2(i64** %pp, i64* %p) {
+entry:
+  %0 = load i64*, i64** %pp, align 8
+  %1 = load i64, i64* %0, align 8
+  %cmp = icmp slt i64 %1, 0
+  %pint = ptrtoint i64* %p to i64
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+  %p1 = add i64 %pint, 8
+  br label %cond.end
+
+cond.false:
+  %p2 = add i64 %pint, 16
+  br label %cond.end
+
+cond.end:
+  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
+  %ptr = inttoptr i64 %p3 to i64*
+  %val = load i64, i64* %ptr, align 8
+  ret i64 %val
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: select
+}

Using the select form, this can be rewritten as:

+define i64 @test2(i64** %pp, i64* %p) {
+entry:
+  %0 = load i64*, i64** %pp, align 8
+  %1 = load i64, i64* %0, align 8
+  %cmp = icmp slt i64 %1, 0
+  %pint = ptrtoint i64* %p to i64
    %cmp.ext = zext i1 %cmp to i64
    %shift = shl i64 8, i64 %cmp.ext
    %p3 = add i64 %ptr, %shift
+  %ptr = inttoptr i64 %p3 to i64*
+  %val = load i64, i64* %ptr, align 8
+  ret i64 %val
}

And then this whole sequence becomes an addressing mode on x86:

    %shift = shl i64 8, i64 %cmp.ext
    %p3 = add i64 %ptr, %shift
+  %ptr = inttoptr i64 %p3 to i64*
+  %val = load i64, i64* %ptr, align 8

Out of curiosity, did you consider trying to improve the lowering of 
select instead?  It seems like the cost model you use here would let you 
make pretty reasonable choices to convert the select back to a branch if 
needed.

Philip

On 12/22/2017 10:54 AM, Guozhi Wei via llvm-commits wrote:
> Author: carrot
> Date: Fri Dec 22 10:54:04 2017
> New Revision: 321377
>
> URL: http://llvm.org/viewvc/llvm-project?rev=321377&view=rev
> Log:
> [SimplifyCFG] Don't do if-conversion if there is a long dependence chain
>
> If after if-conversion, most of the instructions in this new BB construct a long and slow dependence chain, it may be slower than cmp/branch, even if the branch has a high miss rate, because the control dependence is transformed into data dependence, and control dependence can be speculated, and thus, the second part can execute in parallel with the first part on modern OOO processor.
>
> This patch checks for the long dependence chain, and give up if-conversion if find one.
>
> Differential Revision: https://reviews.llvm.org/D39352
>
>
> Added:
>      llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll
> Modified:
>      llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
>      llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
>      llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
>      llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
>      llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
>
> Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=321377&r1=321376&r2=321377&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
> +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Fri Dec 22 10:54:04 2017
> @@ -646,6 +646,9 @@ public:
>     /// \brief Additional properties of an operand's values.
>     enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
>   
> +  /// \return True if target can execute instructions out of order.
> +  bool isOutOfOrder() const;
> +
>     /// \return The number of scalar or vector registers that the target has.
>     /// If 'Vectors' is true, it returns the number of vector registers. If it is
>     /// set to false, it returns the number of scalar registers.
> @@ -1018,6 +1021,7 @@ public:
>                               Type *Ty) = 0;
>     virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
>                               Type *Ty) = 0;
> +  virtual bool isOutOfOrder() const = 0;
>     virtual unsigned getNumberOfRegisters(bool Vector) = 0;
>     virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
>     virtual unsigned getMinVectorRegisterBitWidth() = 0;
> @@ -1295,6 +1299,9 @@ public:
>                       Type *Ty) override {
>       return Impl.getIntImmCost(IID, Idx, Imm, Ty);
>     }
> +  bool isOutOfOrder() const override {
> +    return Impl.isOutOfOrder();
> +  }
>     unsigned getNumberOfRegisters(bool Vector) override {
>       return Impl.getNumberOfRegisters(Vector);
>     }
>
> Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=321377&r1=321376&r2=321377&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
> +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Fri Dec 22 10:54:04 2017
> @@ -337,6 +337,8 @@ public:
>       return TTI::TCC_Free;
>     }
>   
> +  bool isOutOfOrder() const { return false; }
> +
>     unsigned getNumberOfRegisters(bool Vector) { return 8; }
>   
>     unsigned getRegisterBitWidth(bool Vector) const { return 32; }
>
> Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=321377&r1=321376&r2=321377&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Fri Dec 22 10:54:04 2017
> @@ -402,6 +402,10 @@ public:
>       return BaseT::getInstructionLatency(I);
>     }
>   
> +  bool isOutOfOrder() const {
> +    return getST()->getSchedModel().isOutOfOrder();
> +  }
> +
>     /// @}
>   
>     /// \name Vector TTI Implementations
>
> Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=321377&r1=321376&r2=321377&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Fri Dec 22 10:54:04 2017
> @@ -314,6 +314,10 @@ int TargetTransformInfo::getIntImmCost(I
>     return Cost;
>   }
>   
> +bool TargetTransformInfo::isOutOfOrder() const {
> +  return TTIImpl->isOutOfOrder();
> +}
> +
>   unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
>     return TTIImpl->getNumberOfRegisters(Vector);
>   }
>
> Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=321377&r1=321376&r2=321377&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original)
> +++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Fri Dec 22 10:54:04 2017
> @@ -127,6 +127,16 @@ static cl::opt<unsigned> MaxSpeculationD
>       cl::desc("Limit maximum recursion depth when calculating costs of "
>                "speculatively executed instructions"));
>   
> +static cl::opt<unsigned> DependenceChainLatency(
> +    "dependence-chain-latency", cl::Hidden, cl::init(8),
> +    cl::desc("Limit the maximum latency of dependence chain containing cmp "
> +             "for if conversion"));
> +
> +static cl::opt<unsigned> SmallBBSize(
> +    "small-bb-size", cl::Hidden, cl::init(40),
> +    cl::desc("Check dependence chain latency only in basic block smaller than "
> +             "this number"));
> +
>   STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
>   STATISTIC(NumLinearMaps,
>             "Number of switch instructions turned into linear mapping");
> @@ -395,6 +405,166 @@ static bool DominatesMergePoint(Value *V
>     return true;
>   }
>   
> +/// Estimate the code size of the specified BB.
> +static unsigned CountBBCodeSize(BasicBlock *BB,
> +                                const TargetTransformInfo &TTI) {
> +  unsigned Size = 0;
> +  for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
> +    Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
> +  return Size;
> +}
> +
> +/// Find out the latency of the longest dependence chain in the BB if
> +/// LongestChain is true, or the dependence chain containing the compare
> +/// instruction feeding the block's conditional branch.
> +static unsigned FindDependenceChainLatency(BasicBlock *BB,
> +                            DenseMap<Instruction *, unsigned> &Instructions,
> +                            const TargetTransformInfo &TTI,
> +                            bool LongestChain) {
> +  unsigned MaxLatency = 0;
> +
> +  BasicBlock::iterator II;
> +  for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
> +    unsigned Latency = 0;
> +    for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
> +      Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
> +      if (Op && Instructions.count(Op)) {
> +        auto OpLatency = Instructions[Op];
> +        if (OpLatency > Latency)
> +          Latency = OpLatency;
> +      }
> +    }
> +    Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
> +    Instructions[&(*II)] = Latency;
> +
> +    if (Latency > MaxLatency)
> +      MaxLatency = Latency;
> +  }
> +
> +  if (LongestChain)
> +    return MaxLatency;
> +
> +  // The length of the dependence chain containing the compare instruction is
> +  // wanted, so the terminator must be a BranchInst.
> +  assert(isa<BranchInst>(II));
> +  BranchInst* Br = cast<BranchInst>(II);
> +  Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
> +  if (Cmp && Instructions.count(Cmp))
> +    return Instructions[Cmp];
> +  else
> +    return 0;
> +}
> +
> +/// Instructions in BB2 may depend on instructions in BB1, and instructions
> +/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
> +/// of instruction in BB1 is I, then the instructions after I can be executed
> +/// in parallel with instructions in BB2.
> +/// This function returns the latency of I.
> +static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
> +                        BasicBlock *IfBlock1, BasicBlock *IfBlock2,
> +                        DenseMap<Instruction *, unsigned> &BB1Instructions) {
> +  unsigned LastLatency = 0;
> +  SmallVector<Instruction *, 16> Worklist;
> +  BasicBlock::iterator II;
> +  for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
> +    if (PHINode *PN = dyn_cast<PHINode>(II)) {
> +      // Look for users in BB2.
> +      bool InBBUser = false;
> +      for (User *U : PN->users()) {
> +        if (cast<Instruction>(U)->getParent() == BB2) {
> +          InBBUser = true;
> +          break;
> +        }
> +      }
> +      // No such user, we don't care about this instruction and its operands.
> +      if (!InBBUser)
> +        break;
> +    }
> +    Worklist.push_back(&(*II));
> +  }
> +
> +  while (!Worklist.empty()) {
> +    Instruction *I = Worklist.pop_back_val();
> +    for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
> +      if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
> +        if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
> +          Worklist.push_back(Op);
> +        else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
> +          if (BB1Instructions[Op] > LastLatency)
> +            LastLatency = BB1Instructions[Op];
> +        }
> +      }
> +    }
> +  }
> +
> +  return LastLatency;
> +}
> +
> +/// If after if conversion, most of the instructions in this new BB construct a
> +/// long and slow dependence chain, it may be slower than cmp/branch, even
> +/// if the branch has a high miss rate, because the control dependence is
> +/// transformed into data dependence, and control dependence can be speculated,
> +/// and thus, the second part can execute in parallel with the first part on
> +/// modern OOO processor.
> +///
> +/// To check this condition, this function finds the length of the dependence
> +/// chain in BB1 (only the part that can be executed in parallel with code after
> +/// branch in BB2) containing cmp, and if the length is longer than a threshold,
> +/// don't perform if conversion.
> +///
> +/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
> +/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
> +static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
> +                             BasicBlock *IfBlock1, BasicBlock *IfBlock2,
> +                             unsigned SpeculationSize,
> +                             const TargetTransformInfo &TTI) {
> +  // Accumulated latency of each instruction in their BBs.
> +  DenseMap<Instruction *, unsigned> BB1Instructions;
> +  DenseMap<Instruction *, unsigned> BB2Instructions;
> +
> +  if (!TTI.isOutOfOrder())
> +    return false;
> +
> +  unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
> +                         + SpeculationSize;
> +
> +  // We check small BB only since it is more difficult to find unrelated
> +  // instructions to fill functional units in a small BB.
> +  if (NewBBSize > SmallBBSize)
> +    return false;
> +
> +  auto BB1Chain =
> +         FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
> +  auto BB2Chain =
> +         FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
> +
> +  // If there are many unrelated instructions in the new BB, there will be
> +  // other instructions for the processor to issue regardless of the length
> +  // of this new dependence chain.
> +  // Modern processors can issue 3 or more instructions in each cycle. But in
> +  // real world applications, an IPC of 2 is already very good for non-loop
> +  // code with small basic blocks. Higher IPC is usually found in programs with
> +  // small kernel. So IPC of 2 is more reasonable for most applications.
> +  if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
> +    return false;
> +
> +  // We only care about part of the dependence chain in BB1 that can be
> +  // executed in parallel with BB2, so adjust the latency.
> +  BB1Chain -=
> +      LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
> +
> +  // Correctly predicted branch instruction can skip the dependence chain in
> +  // BB1, but misprediction has a penalty, so only when the dependence chain is
> +  // longer than DependenceChainLatency, then branch is better than select.
> +  // Besides misprediction penalty, the threshold value DependenceChainLatency
> +  // also depends on branch misprediction rate, taken branch latency and cmov
> +  // latency.
> +  if (BB1Chain >= DependenceChainLatency)
> +    return true;
> +
> +  return false;
> +}
> +
>   /// Extract ConstantInt from value, looking through IntToPtr
>   /// and PointerNullValue. Return NULL if value is not a constant int.
>   static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
> @@ -2044,6 +2214,11 @@ static bool SpeculativelyExecuteBB(Branc
>     if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
>       return false;
>   
> +  // Don't do if conversion for long dependence chain.
> +  if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
> +                              CountBBCodeSize(ThenBB, TTI), TTI))
> +    return false;
> +
>     // If we get here, we can hoist the instruction and if-convert.
>     DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
>   
> @@ -2351,6 +2526,10 @@ static bool FoldTwoEntryPHINode(PHINode
>         }
>     }
>   
> +  if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
> +                              AggressiveInsts.size(), TTI))
> +    return false;
> +
>     DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
>                  << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
>   
>
> Added: llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll?rev=321377&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll (added)
> +++ llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll Fri Dec 22 10:54:04 2017
> @@ -0,0 +1,231 @@
> +; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s
> +; Avoid if-conversion if there is a long dependence chain.
> +
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
> +
> +; The first several cases test FindLongDependenceChain returns true, so
> +; if-conversion is blocked.
> +
> +define i64 @test1(i64** %pp, i64* %p) {
> +entry:
> +  %0 = load i64*, i64** %pp, align 8
> +  %1 = load i64, i64* %0, align 8
> +  %cmp = icmp slt i64 %1, 0
> +  %pint = ptrtoint i64* %p to i64
> +  br i1 %cmp, label %cond.true, label %cond.false
> +
> +cond.true:
> +  %p1 = add i64 %pint, 8
> +  br label %cond.end
> +
> +cond.false:
> +  %p2 = or i64 %pint, 16
> +  br label %cond.end
> +
> +cond.end:
> +  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
> +  %ptr = inttoptr i64 %p3 to i64*
> +  %val = load i64, i64* %ptr, align 8
> +  ret i64 %val
> +
> +; CHECK-NOT: select
> +}
> +
> +define i64 @test2(i64** %pp, i64* %p) {
> +entry:
> +  %0 = load i64*, i64** %pp, align 8
> +  %1 = load i64, i64* %0, align 8
> +  %cmp = icmp slt i64 %1, 0
> +  %pint = ptrtoint i64* %p to i64
> +  br i1 %cmp, label %cond.true, label %cond.false
> +
> +cond.true:
> +  %p1 = add i64 %pint, 8
> +  br label %cond.end
> +
> +cond.false:
> +  %p2 = add i64 %pint, 16
> +  br label %cond.end
> +
> +cond.end:
> +  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
> +  %ptr = inttoptr i64 %p3 to i64*
> +  %val = load i64, i64* %ptr, align 8
> +  ret i64 %val
> +
> +; CHECK-LABEL: @test2
> +; CHECK-NOT: select
> +}
> +
> +; The following cases test FindLongDependenceChain returns false, so
> +; if-conversion will proceed.
> +
> +; Non trivial LatencyAdjustment.
> +define i64 @test3(i64** %pp, i64* %p) {
> +entry:
> +  %0 = load i64*, i64** %pp, align 8
> +  %1 = load i64, i64* %0, align 8
> +  %cmp = icmp slt i64 %1, 0
> +  %pint = ptrtoint i64* %p to i64
> +  br i1 %cmp, label %cond.true, label %cond.false
> +
> +cond.true:
> +  %p1 = add i64 %pint, 8
> +  br label %cond.end
> +
> +cond.false:
> +  %p2 = or i64 %pint, 16
> +  br label %cond.end
> +
> +cond.end:
> +  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
> +  %p4 = add i64 %p3, %1
> +  %ptr = inttoptr i64 %p4 to i64*
> +  %val = load i64, i64* %ptr, align 8
> +  ret i64 %val
> +
> +; CHECK-LABEL: @test3
> +; CHECK: select
> +}
> +
> +; Short dependence chain.
> +define i64 @test4(i64* %pp, i64* %p) {
> +entry:
> +  %0 = load i64, i64* %pp, align 8
> +  %cmp = icmp slt i64 %0, 0
> +  %pint = ptrtoint i64* %p to i64
> +  br i1 %cmp, label %cond.true, label %cond.false
> +
> +cond.true:
> +  %p1 = add i64 %pint, 8
> +  br label %cond.end
> +
> +cond.false:
> +  %p2 = or i64 %pint, 16
> +  br label %cond.end
> +
> +cond.end:
> +  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
> +  %ptr = inttoptr i64 %p3 to i64*
> +  %val = load i64, i64* %ptr, align 8
> +  ret i64 %val
> +
> +; CHECK-LABEL: @test4
> +; CHECK: select
> +}
> +
> +; High IPC.
> +define i64 @test5(i64** %pp, i64* %p) {
> +entry:
> +  %0 = load i64*, i64** %pp, align 8
> +  %1 = load i64, i64* %0, align 8
> +  %cmp = icmp slt i64 %1, 0
> +  %pint = ptrtoint i64* %p to i64
> +  %2 = add i64 %pint, 2
> +  %3 = add i64 %pint, 3
> +  %4 = or i64 %pint, 16
> +  %5 = and i64 %pint, 255
> +
> +  %6 = or i64 %2, 9
> +  %7 = and i64 %3, 255
> +  %8 = add i64 %4, 4
> +  %9 = add i64 %5, 5
> +
> +  %10 = add i64 %6, 2
> +  %11 = add i64 %7, 3
> +  %12 = add i64 %8, 4
> +  %13 = add i64 %9, 5
> +
> +  %14 = add i64 %10, 6
> +  %15 = add i64 %11, 7
> +  %16 = add i64 %12, 8
> +  %17 = add i64 %13, 9
> +
> +  %18 = add i64 %14, 10
> +  %19 = add i64 %15, 11
> +  %20 = add i64 %16, 12
> +  %21 = add i64 %17, 13
> +
> +  br i1 %cmp, label %cond.true, label %cond.false
> +
> +cond.true:
> +  %p1 = add i64 %pint, 8
> +  br label %cond.end
> +
> +cond.false:
> +  %p2 = or i64 %pint, 16
> +  br label %cond.end
> +
> +cond.end:
> +  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
> +  %ptr = inttoptr i64 %p3 to i64*
> +  %val = load i64, i64* %ptr, align 8
> +
> +  ret i64 %val
> +
> +; CHECK-LABEL: @test5
> +; CHECK: select
> +}
> +
> +; Large BB size.
> +define i64 @test6(i64** %pp, i64* %p) {
> +entry:
> +  %0 = load i64*, i64** %pp, align 8
> +  %1 = load i64, i64* %0, align 8
> +  %cmp = icmp slt i64 %1, 0
> +  %pint = ptrtoint i64* %p to i64
> +  br i1 %cmp, label %cond.true, label %cond.false
> +
> +cond.true:
> +  %p1 = add i64 %pint, 8
> +  br label %cond.end
> +
> +cond.false:
> +  %p2 = or i64 %pint, 16
> +  br label %cond.end
> +
> +cond.end:
> +  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
> +  %ptr = inttoptr i64 %p3 to i64*
> +  %val = load i64, i64* %ptr, align 8
> +  %2 = add i64 %pint, 2
> +  %3 = add i64 %pint, 3
> +  %4 = add i64 %2, 4
> +  %5 = add i64 %3, 5
> +  %6 = add i64 %4, 6
> +  %7 = add i64 %5, 7
> +  %8 = add i64 %6, 6
> +  %9 = add i64 %7, 7
> +  %10 = add i64 %8, 6
> +  %11 = add i64 %9, 7
> +  %12 = add i64 %10, 6
> +  %13 = add i64 %11, 7
> +  %14 = add i64 %12, 6
> +  %15 = add i64 %13, 7
> +  %16 = add i64 %14, 6
> +  %17 = add i64 %15, 7
> +  %18 = add i64 %16, 6
> +  %19 = add i64 %17, 7
> +  %20 = add i64 %18, 6
> +  %21 = add i64 %19, 7
> +  %22 = add i64 %20, 6
> +  %23 = add i64 %21, 7
> +  %24 = add i64 %22, 6
> +  %25 = add i64 %23, 7
> +  %26 = add i64 %24, 6
> +  %27 = add i64 %25, 7
> +  %28 = add i64 %26, 6
> +  %29 = add i64 %27, 7
> +  %30 = add i64 %28, 6
> +  %31 = add i64 %29, 7
> +  %32 = add i64 %30, 8
> +  %33 = add i64 %31, 9
> +  %34 = add i64 %32, %33
> +  %35 = and i64 %34, 255
> +  %res = add i64 %val, %35
> +
> +  ret i64 %res
> +
> +; CHECK-LABEL: @test6
> +; CHECK: select
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits