[llvm-commits] [llvm] r75252 - in /llvm/trunk: include/llvm/Analysis/ScalarEvolution.h include/llvm/Analysis/ScalarEvolutionExpressions.h lib/Analysis/ScalarEvolution.cpp test/Transforms/IndVarSimplify/iv-sext.ll

Nick Lewycky nlewycky at google.com
Fri Jul 10 10:05:32 PDT 2009


2009/7/10 Dan Gohman <gohman at apple.com>

> Author: djg
> Date: Fri Jul 10 11:42:52 2009
> New Revision: 75252
>
> URL: http://llvm.org/viewvc/llvm-project?rev=75252&view=rev
> Log:
> Generalize ScalarEvolution's cast-folding code to support more kinds
> of loops. Add several new functions to for working with ScalarEvolution's
> add-hoc value-range analysis functionality.


Dan, this looks great! Does this kill off LoopVR?

Why does isKnownPositive(Step) control whether the range should be
considered signed or unsigned? You're already considering it 'signed' just
by asking the question; an unsigned value is always positive :-)

The intuition here is that getSignedRange and getUnsignedRange could be
merged by checking for the SLT, SGT and ULE cases all in one implementation
which in turn leads to collapsing of cases in a lot of code. (Oh, and the
SCEVUnknown code could do both types of computation and just take the
smaller of the two resulting ranges.)

Nick


> Added:
>    llvm/trunk/test/Transforms/IndVarSimplify/iv-sext.ll
> Modified:
>    llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
>    llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h
>    llvm/trunk/lib/Analysis/ScalarEvolution.cpp
>
> Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=75252&r1=75251&r2=75252&view=diff
>
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original)
> +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Fri Jul 10 11:42:52
> 2009
> @@ -26,6 +26,7 @@
>  #include "llvm/Support/DataTypes.h"
>  #include "llvm/Support/ValueHandle.h"
>  #include "llvm/Support/Allocator.h"
> +#include "llvm/Support/ConstantRange.h"
>  #include "llvm/ADT/FoldingSet.h"
>  #include "llvm/ADT/DenseMap.h"
>  #include <iosfwd>
> @@ -333,12 +334,20 @@
>     /// found.
>     BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
>
> -    /// isNecessaryCond - Test whether the given CondValue value is a
> condition
> -    /// which is at least as strict as the one described by Pred, LHS, and
> RHS.
> +    /// isNecessaryCond - Test whether the condition described by Pred,
> LHS,
> +    /// and RHS is a necessary condition for the given Cond value to
> evaluate
> +    /// to true.
>     bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred,
>                          const SCEV *LHS, const SCEV *RHS,
>                          bool Inverse);
>
> +    /// isNecessaryCondOperands - Test whether the condition described by
> Pred,
> +    /// LHS, and RHS is a necessary condition for the condition described
> by
> +    /// Pred, FoundLHS, and FoundRHS to evaluate to true.
> +    bool isNecessaryCondOperands(ICmpInst::Predicate Pred,
> +                                 const SCEV *LHS, const SCEV *RHS,
> +                                 const SCEV *FoundLHS, const SCEV
> *FoundRHS);
> +
>     /// getConstantEvolutionLoopExitValue - If we know that the specified
> Phi is
>     /// in the header of its containing loop, we know the loop executes a
>     /// constant number of times, and the PHI node is just a recurrence
> @@ -506,10 +515,16 @@
>
>     /// isLoopGuardedByCond - Test whether entry to the loop is protected
> by
>     /// a conditional between LHS and RHS.  This is used to help avoid max
> -    /// expressions in loop trip counts.
> +    /// expressions in loop trip counts, and to eliminate casts.
>     bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
>                              const SCEV *LHS, const SCEV *RHS);
>
> +    /// isLoopBackedgeGuardedByCond - Test whether the backedge of the
> loop is
> +    /// protected by a conditional between LHS and RHS.  This is used to
> +    /// to eliminate casts.
> +    bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate
> Pred,
> +                                     const SCEV *LHS, const SCEV *RHS);
> +
>     /// getBackedgeTakenCount - If the specified loop has a predictable
>     /// backedge-taken count, return it, otherwise return a
> SCEVCouldNotCompute
>     /// object. The backedge-taken count is the number of times the loop
> header
> @@ -545,13 +560,42 @@
>     /// bitwidth of S.
>     uint32_t GetMinTrailingZeros(const SCEV *S);
>
> -    /// GetMinLeadingZeros - Determine the minimum number of zero bits
> that S is
> -    /// guaranteed to begin with (at every loop iteration).
> -    uint32_t GetMinLeadingZeros(const SCEV *S);
> -
> -    /// GetMinSignBits - Determine the minimum number of sign bits that S
> is
> -    /// guaranteed to begin with.
> -    uint32_t GetMinSignBits(const SCEV *S);
> +    /// getUnsignedRange - Determine the unsigned range for a particular
> SCEV.
> +    ///
> +    ConstantRange getUnsignedRange(const SCEV *S);
> +
> +    /// getSignedRange - Determine the signed range for a particular SCEV.
> +    ///
> +    ConstantRange getSignedRange(const SCEV *S);
> +
> +    /// isKnownNegative - Test if the given expression is known to be
> negative.
> +    ///
> +    bool isKnownNegative(const SCEV *S);
> +
> +    /// isKnownPositive - Test if the given expression is known to be
> positive.
> +    ///
> +    bool isKnownPositive(const SCEV *S);
> +
> +    /// isKnownNonNegative - Test if the given expression is known to be
> +    /// non-negative.
> +    ///
> +    bool isKnownNonNegative(const SCEV *S);
> +
> +    /// isKnownNonPositive - Test if the given expression is known to be
> +    /// non-positive.
> +    ///
> +    bool isKnownNonPositive(const SCEV *S);
> +
> +    /// isKnownNonZero - Test if the given expression is known to be
> +    /// non-zero.
> +    ///
> +    bool isKnownNonZero(const SCEV *S);
> +
> +    /// isKnownNonZero - Test if the given expression is known to satisfy
> +    /// the condition described by Pred, LHS, and RHS.
> +    ///
> +    bool isKnownPredicate(ICmpInst::Predicate Pred,
> +                          const SCEV *LHS, const SCEV *RHS);
>
>     virtual bool runOnFunction(Function &F);
>     virtual void releaseMemory();
>
> Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h?rev=75252&r1=75251&r2=75252&view=diff
>
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h
> (original)
> +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h Fri Jul
> 10 11:42:52 2009
> @@ -453,6 +453,12 @@
>                                                  const SCEV *Conc,
>                                                  ScalarEvolution &SE)
> const;
>
> +    /// getPostIncExpr - Return an expression representing the value of
> +    /// this expression one iteration of the loop ahead.
> +    const SCEV *getPostIncExpr(ScalarEvolution &SE) const {
> +      return SE.getAddExpr(this, getStepRecurrence(SE));
> +    }
> +
>     virtual void print(raw_ostream &OS) const;
>
>     /// Methods for support type inquiry through isa, cast, and dyn_cast:
>
> Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=75252&r1=75251&r2=75252&view=diff
>
>
> ==============================================================================
> --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
> +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Fri Jul 10 11:42:52 2009
> @@ -811,6 +811,11 @@
>   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
>   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
>     if (AR->isAffine()) {
> +      const SCEV *Start = AR->getStart();
> +      const SCEV *Step = AR->getStepRecurrence(*this);
> +      unsigned BitWidth = getTypeSizeInBits(AR->getType());
> +      const Loop *L = AR->getLoop();
> +
>       // Check whether the backedge-taken count is SCEVCouldNotCompute.
>       // Note that this serves two purposes: It filters out loops that are
>       // simply not analyzable, and it covers the case where this code is
> @@ -819,12 +824,10 @@
>       // in infinite recursion. In the later case, the analysis code will
>       // cope with a conservative value, and it will take care to purge
>       // that value once it has finished.
> -      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
> +      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
>       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
>         // Manually compute the final value for AR, checking for
>         // overflow.
> -        const SCEV *Start = AR->getStart();
> -        const SCEV *Step = AR->getStepRecurrence(*this);
>
>         // Check whether the backedge-taken count can be losslessly casted
> to
>         // the addrec's type. The count is always unsigned.
> @@ -833,8 +836,7 @@
>         const SCEV *RecastedMaxBECount =
>           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
>         if (MaxBECount == RecastedMaxBECount) {
> -          const Type *WideTy =
> -            IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
> +          const Type *WideTy = IntegerType::get(BitWidth * 2);
>           // Check whether Start+Step*MaxBECount has no unsigned overflow.
>           const SCEV *ZMul =
>             getMulExpr(CastedMaxBECount,
> @@ -848,7 +850,7 @@
>             // Return the expression with the addrec on the outside.
>             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
>                                  getZeroExtendExpr(Step, Ty),
> -                                 AR->getLoop());
> +                                 L);
>
>           // Similar to above, only this time treat the step value as
> signed.
>           // This covers loops that count down.
> @@ -864,7 +866,35 @@
>             // Return the expression with the addrec on the outside.
>             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
>                                  getSignExtendExpr(Step, Ty),
> -                                 AR->getLoop());
> +                                 L);
> +        }
> +
> +        // If the backedge is guarded by a comparison with the pre-inc
> value
> +        // the addrec is safe. Also, if the entry is guarded by a
> comparison
> +        // with the start value and the backedge is guarded by a
> comparison
> +        // with the post-inc value, the addrec is safe.
> +        if (isKnownPositive(Step)) {
> +          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
> +
>  getUnsignedRange(Step).getUnsignedMax());
> +          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
> +              (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
> +               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
> +                                           AR->getPostIncExpr(*this), N)))
> +            // Return the expression with the addrec on the outside.
> +            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
> +                                 getZeroExtendExpr(Step, Ty),
> +                                 L);
> +        } else if (isKnownNegative(Step)) {
> +          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
> +
>  getSignedRange(Step).getSignedMin());
> +          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) &&
> +              (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) ||
> +               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
> +                                           AR->getPostIncExpr(*this), N)))
> +            // Return the expression with the addrec on the outside.
> +            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
> +                                 getSignExtendExpr(Step, Ty),
> +                                 L);
>         }
>       }
>     }
> @@ -907,6 +937,11 @@
>   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
>   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
>     if (AR->isAffine()) {
> +      const SCEV *Start = AR->getStart();
> +      const SCEV *Step = AR->getStepRecurrence(*this);
> +      unsigned BitWidth = getTypeSizeInBits(AR->getType());
> +      const Loop *L = AR->getLoop();
> +
>       // Check whether the backedge-taken count is SCEVCouldNotCompute.
>       // Note that this serves two purposes: It filters out loops that are
>       // simply not analyzable, and it covers the case where this code is
> @@ -915,12 +950,10 @@
>       // in infinite recursion. In the later case, the analysis code will
>       // cope with a conservative value, and it will take care to purge
>       // that value once it has finished.
> -      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
> +      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
>       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
>         // Manually compute the final value for AR, checking for
>         // overflow.
> -        const SCEV *Start = AR->getStart();
> -        const SCEV *Step = AR->getStepRecurrence(*this);
>
>         // Check whether the backedge-taken count can be losslessly casted
> to
>         // the addrec's type. The count is always unsigned.
> @@ -929,8 +962,7 @@
>         const SCEV *RecastedMaxBECount =
>           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
>         if (MaxBECount == RecastedMaxBECount) {
> -          const Type *WideTy =
> -            IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
> +          const Type *WideTy = IntegerType::get(BitWidth * 2);
>           // Check whether Start+Step*MaxBECount has no signed overflow.
>           const SCEV *SMul =
>             getMulExpr(CastedMaxBECount,
> @@ -944,7 +976,35 @@
>             // Return the expression with the addrec on the outside.
>             return getAddRecExpr(getSignExtendExpr(Start, Ty),
>                                  getSignExtendExpr(Step, Ty),
> -                                 AR->getLoop());
> +                                 L);
> +        }
> +
> +        // If the backedge is guarded by a comparison with the pre-inc
> value
> +        // the addrec is safe. Also, if the entry is guarded by a
> comparison
> +        // with the start value and the backedge is guarded by a
> comparison
> +        // with the post-inc value, the addrec is safe.
> +        if (isKnownPositive(Step)) {
> +          const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
> +
>  getSignedRange(Step).getSignedMax());
> +          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
> +              (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
> +               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
> +                                           AR->getPostIncExpr(*this), N)))
> +            // Return the expression with the addrec on the outside.
> +            return getAddRecExpr(getSignExtendExpr(Start, Ty),
> +                                 getSignExtendExpr(Step, Ty),
> +                                 L);
> +        } else if (isKnownNegative(Step)) {
> +          const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
> +
>  getSignedRange(Step).getSignedMin());
> +          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
> +              (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
> +               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
> +                                           AR->getPostIncExpr(*this), N)))
> +            // Return the expression with the addrec on the outside.
> +            return getAddRecExpr(getSignExtendExpr(Start, Ty),
> +                                 getSignExtendExpr(Step, Ty),
> +                                 L);
>         }
>       }
>     }
> @@ -2391,19 +2451,16 @@
>       const StructLayout &SL = *TD->getStructLayout(STy);
>       unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
>       uint64_t Offset = SL.getElementOffset(FieldNo);
> -      TotalOffset = getAddExpr(TotalOffset,
> -                                  getIntegerSCEV(Offset, IntPtrTy));
> +      TotalOffset = getAddExpr(TotalOffset, getIntegerSCEV(Offset,
> IntPtrTy));
>     } else {
>       // For an array, add the element offset, explicitly scaled.
>       const SCEV *LocalOffset = getSCEV(Index);
>       if (!isa<PointerType>(LocalOffset->getType()))
>         // Getelementptr indicies are signed.
> -        LocalOffset = getTruncateOrSignExtend(LocalOffset,
> -                                              IntPtrTy);
> +        LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
>       LocalOffset =
>         getMulExpr(LocalOffset,
> -                   getIntegerSCEV(TD->getTypeAllocSize(*GTI),
> -                                  IntPtrTy));
> +                   getIntegerSCEV(TD->getTypeAllocSize(*GTI), IntPtrTy));
>       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
>     }
>   }
> @@ -2491,18 +2548,95 @@
>   return 0;
>  }
>
> -uint32_t
> -ScalarEvolution::GetMinLeadingZeros(const SCEV *S) {
> -  // TODO: Handle other SCEV expression types here.
> +/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
> +///
> +ConstantRange
> +ScalarEvolution::getUnsignedRange(const SCEV *S) {
>
>   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
> -    return C->getValue()->getValue().countLeadingZeros();
> +    return ConstantRange(C->getValue()->getValue());
> +
> +  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(Add->getOperand(0));
> +    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
> +      X = X.add(getUnsignedRange(Add->getOperand(i)));
> +    return X;
> +  }
> +
> +  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(Mul->getOperand(0));
> +    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
> +      X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
> +    return X;
> +  }
> +
> +  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(SMax->getOperand(0));
> +    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
> +      X = X.smax(getUnsignedRange(SMax->getOperand(i)));
> +    return X;
> +  }
> +
> +  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(UMax->getOperand(0));
> +    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
> +      X = X.umax(getUnsignedRange(UMax->getOperand(i)));
> +    return X;
> +  }
>
> -  if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) {
> -    // A zero-extension cast adds zero bits.
> -    return GetMinLeadingZeros(C->getOperand()) +
> -           (getTypeSizeInBits(C->getType()) -
> -            getTypeSizeInBits(C->getOperand()->getType()));
> +  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(UDiv->getLHS());
> +    ConstantRange Y = getUnsignedRange(UDiv->getRHS());
> +    return X.udiv(Y);
> +  }
> +
> +  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(ZExt->getOperand());
> +    return
> X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
> +  }
> +
> +  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(SExt->getOperand());
> +    return
> X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
> +  }
> +
> +  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
> +    ConstantRange X = getUnsignedRange(Trunc->getOperand());
> +    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
> +  }
> +
> +  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
> +
> +  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
> +    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
> +    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
> +    if (!Trip) return FullSet;
> +
> +    // TODO: non-affine addrec
> +    if (AddRec->isAffine()) {
> +      const Type *Ty = AddRec->getType();
> +      const SCEV *MaxBECount =
> getMaxBackedgeTakenCount(AddRec->getLoop());
> +      if (getTypeSizeInBits(MaxBECount->getType()) <=
> getTypeSizeInBits(Ty)) {
> +        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
> +
> +        const SCEV *Start = AddRec->getStart();
> +        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
> +
> +        // Check for overflow.
> +        if (!isKnownPredicate(ICmpInst::ICMP_ULE, Start, End))
> +          return FullSet;
> +
> +        ConstantRange StartRange = getUnsignedRange(Start);
> +        ConstantRange EndRange = getUnsignedRange(End);
> +        APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
> +                                   EndRange.getUnsignedMin());
> +        APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
> +                                   EndRange.getUnsignedMax());
> +        if (Min.isMinValue() && Max.isMaxValue())
> +          return ConstantRange(Min.getBitWidth(), /*isFullSet=*/true);
> +        return ConstantRange(Min, Max+1);
> +      }
> +    }
>   }
>
>   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
> @@ -2511,67 +2645,119 @@
>     APInt Mask = APInt::getAllOnesValue(BitWidth);
>     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
>     ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
> -    return Zeros.countLeadingOnes();
> +    return ConstantRange(Ones, ~Zeros);
>   }
>
> -  return 1;
> +  return FullSet;
>  }
>
> -uint32_t
> -ScalarEvolution::GetMinSignBits(const SCEV *S) {
> -  // TODO: Handle other SCEV expression types here.
> +/// getSignedRange - Determine the signed range for a particular SCEV.
> +///
> +ConstantRange
> +ScalarEvolution::getSignedRange(const SCEV *S) {
> +
> +  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
> +    return ConstantRange(C->getValue()->getValue());
>
> -  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
> -    const APInt &A = C->getValue()->getValue();
> -    return A.isNegative() ? A.countLeadingOnes() :
> -                            A.countLeadingZeros();
> +  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
> +    ConstantRange X = getSignedRange(Add->getOperand(0));
> +    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
> +      X = X.add(getSignedRange(Add->getOperand(i)));
> +    return X;
>   }
>
> -  if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) {
> -    // A sign-extension cast adds sign bits.
> -    return GetMinSignBits(C->getOperand()) +
> -           (getTypeSizeInBits(C->getType()) -
> -            getTypeSizeInBits(C->getOperand()->getType()));
> +  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
> +    ConstantRange X = getSignedRange(Mul->getOperand(0));
> +    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
> +      X = X.multiply(getSignedRange(Mul->getOperand(i)));
> +    return X;
>   }
>
> -  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
> -    unsigned BitWidth = getTypeSizeInBits(A->getType());
> +  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
> +    ConstantRange X = getSignedRange(SMax->getOperand(0));
> +    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
> +      X = X.smax(getSignedRange(SMax->getOperand(i)));
> +    return X;
> +  }
> +
> +  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
> +    ConstantRange X = getSignedRange(UMax->getOperand(0));
> +    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
> +      X = X.umax(getSignedRange(UMax->getOperand(i)));
> +    return X;
> +  }
> +
> +  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
> +    ConstantRange X = getSignedRange(UDiv->getLHS());
> +    ConstantRange Y = getSignedRange(UDiv->getRHS());
> +    return X.udiv(Y);
> +  }
> +
> +  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
> +    ConstantRange X = getSignedRange(ZExt->getOperand());
> +    return
> X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
> +  }
>
> -    // Special case decrementing a value (ADD X, -1):
> -    if (const SCEVConstant *CRHS =
> dyn_cast<SCEVConstant>(A->getOperand(0)))
> -      if (CRHS->isAllOnesValue()) {
> -        SmallVector<const SCEV *, 4> OtherOps(A->op_begin() + 1,
> A->op_end());
> -        const SCEV *OtherOpsAdd = getAddExpr(OtherOps);
> -        unsigned LZ = GetMinLeadingZeros(OtherOpsAdd);
> -
> -        // If the input is known to be 0 or 1, the output is 0/-1, which
> is all
> -        // sign bits set.
> -        if (LZ == BitWidth - 1)
> -          return BitWidth;
> -
> -        // If we are subtracting one from a positive number, there is no
> carry
> -        // out of the result.
> -        if (LZ > 0)
> -          return GetMinSignBits(OtherOpsAdd);
> -      }
> -
> -    // Add can have at most one carry bit.  Thus we know that the output
> -    // is, at worst, one more bit than the inputs.
> -    unsigned Min = BitWidth;
> -    for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
> -      unsigned N = GetMinSignBits(A->getOperand(i));
> -      Min = std::min(Min, N) - 1;
> -      if (Min == 0) return 1;
> +  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
> +    ConstantRange X = getSignedRange(SExt->getOperand());
> +    return
> X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
> +  }
> +
> +  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
> +    ConstantRange X = getSignedRange(Trunc->getOperand());
> +    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
> +  }
> +
> +  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
> +
> +  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
> +    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
> +    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
> +    if (!Trip) return FullSet;
> +
> +    // TODO: non-affine addrec
> +    if (AddRec->isAffine()) {
> +      const Type *Ty = AddRec->getType();
> +      const SCEV *MaxBECount =
> getMaxBackedgeTakenCount(AddRec->getLoop());
> +      if (getTypeSizeInBits(MaxBECount->getType()) <=
> getTypeSizeInBits(Ty)) {
> +        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
> +
> +        const SCEV *Start = AddRec->getStart();
> +        const SCEV *Step = AddRec->getStepRecurrence(*this);
> +        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
> +
> +        // Check for overflow.
> +        if (!(isKnownPositive(Step) &&
> +              isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) &&
> +            !(isKnownNegative(Step) &&
> +              isKnownPredicate(ICmpInst::ICMP_SGT, Start, End)))
> +          return FullSet;
> +
> +        ConstantRange StartRange = getSignedRange(Start);
> +        ConstantRange EndRange = getSignedRange(End);
> +        APInt Min = APIntOps::smin(StartRange.getSignedMin(),
> +                                   EndRange.getSignedMin());
> +        APInt Max = APIntOps::smax(StartRange.getSignedMax(),
> +                                   EndRange.getSignedMax());
> +        if (Min.isMinSignedValue() && Max.isMaxSignedValue())
> +          return ConstantRange(Min.getBitWidth(), /*isFullSet=*/true);
> +        return ConstantRange(Min, Max+1);
> +      }
>     }
> -    return 1;
>   }
>
>   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
>     // For a SCEVUnknown, ask ValueTracking.
> -    return ComputeNumSignBits(U->getValue(), TD);
> +    unsigned BitWidth = getTypeSizeInBits(U->getType());
> +    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
> +    if (NS == 1)
> +      return FullSet;
> +    return
> +      ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
> +                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1);
>   }
>
> -  return 1;
> +  return FullSet;
>  }
>
>  /// createSCEV - We know that there is no SCEV for the specified value.
> @@ -3651,7 +3837,7 @@
>             if (!isSCEVable(Op->getType()))
>               return V;
>
> -            const SCEV *OpV = getSCEVAtScope(getSCEV(Op), L);
> +            const SCEV* OpV = getSCEVAtScope(Op, L);
>             if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) {
>               Constant *C = SC->getValue();
>               if (C->getType() != Op->getType())
> @@ -4052,12 +4238,176 @@
>   return false;
>  }
>
> -/// isLoopGuardedByCond - Test whether entry to the loop is protected by
> -/// a conditional between LHS and RHS.  This is used to help avoid max
> -/// expressions in loop trip counts.
> -bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
> -                                          ICmpInst::Predicate Pred,
> -                                          const SCEV *LHS, const SCEV
> *RHS) {
> +bool ScalarEvolution::isKnownNegative(const SCEV *S) {
> +  return getSignedRange(S).getSignedMax().isNegative();
> +}
> +
> +bool ScalarEvolution::isKnownPositive(const SCEV *S) {
> +  return getSignedRange(S).getSignedMin().isStrictlyPositive();
> +}
> +
> +bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
> +  return !getSignedRange(S).getSignedMin().isNegative();
> +}
> +
> +bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
> +  return !getSignedRange(S).getSignedMax().isStrictlyPositive();
> +}
> +
> +bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
> +  return isKnownNegative(S) || isKnownPositive(S);
> +}
> +
> +bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
> +                                       const SCEV *LHS, const SCEV *RHS) {
> +
> +  if (HasSameValue(LHS, RHS))
> +    return ICmpInst::isTrueWhenEqual(Pred);
> +
> +  switch (Pred) {
> +  default:
> +    assert(0 && "Unexpected ICmpInst::Predicate value!");
> +    break;
> +  case ICmpInst::ICMP_SGT:
> +    Pred = ICmpInst::ICMP_SLT;
> +    std::swap(LHS, RHS);
> +  case ICmpInst::ICMP_SLT: {
> +    ConstantRange LHSRange = getSignedRange(LHS);
> +    ConstantRange RHSRange = getSignedRange(RHS);
> +    if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
> +      return true;
> +    if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
> +      return false;
> +
> +    const SCEV *Diff = getMinusSCEV(LHS, RHS);
> +    ConstantRange DiffRange = getUnsignedRange(Diff);
> +    if (isKnownNegative(Diff)) {
> +      if (DiffRange.getUnsignedMax().ult(LHSRange.getUnsignedMin()))
> +        return true;
> +      if (DiffRange.getUnsignedMin().uge(LHSRange.getUnsignedMax()))
> +        return false;
> +    } else if (isKnownPositive(Diff)) {
> +      if (LHSRange.getUnsignedMax().ult(DiffRange.getUnsignedMin()))
> +        return true;
> +      if (LHSRange.getUnsignedMin().uge(DiffRange.getUnsignedMax()))
> +        return false;
> +    }
> +    break;
> +  }
> +  case ICmpInst::ICMP_SGE:
> +    Pred = ICmpInst::ICMP_SLE;
> +    std::swap(LHS, RHS);
> +  case ICmpInst::ICMP_SLE: {
> +    ConstantRange LHSRange = getSignedRange(LHS);
> +    ConstantRange RHSRange = getSignedRange(RHS);
> +    if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
> +      return true;
> +    if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
> +      return false;
> +
> +    const SCEV *Diff = getMinusSCEV(LHS, RHS);
> +    ConstantRange DiffRange = getUnsignedRange(Diff);
> +    if (isKnownNonPositive(Diff)) {
> +      if (DiffRange.getUnsignedMax().ule(LHSRange.getUnsignedMin()))
> +        return true;
> +      if (DiffRange.getUnsignedMin().ugt(LHSRange.getUnsignedMax()))
> +        return false;
> +    } else if (isKnownNonNegative(Diff)) {
> +      if (LHSRange.getUnsignedMax().ule(DiffRange.getUnsignedMin()))
> +        return true;
> +      if (LHSRange.getUnsignedMin().ugt(DiffRange.getUnsignedMax()))
> +        return false;
> +    }
> +    break;
> +  }
> +  case ICmpInst::ICMP_UGT:
> +    Pred = ICmpInst::ICMP_ULT;
> +    std::swap(LHS, RHS);
> +  case ICmpInst::ICMP_ULT: {
> +    ConstantRange LHSRange = getUnsignedRange(LHS);
> +    ConstantRange RHSRange = getUnsignedRange(RHS);
> +    if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
> +      return true;
> +    if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
> +      return false;
> +
> +    const SCEV *Diff = getMinusSCEV(LHS, RHS);
> +    ConstantRange DiffRange = getUnsignedRange(Diff);
> +    if (LHSRange.getUnsignedMax().ult(DiffRange.getUnsignedMin()))
> +      return true;
> +    if (LHSRange.getUnsignedMin().uge(DiffRange.getUnsignedMax()))
> +      return false;
> +    break;
> +  }
> +  case ICmpInst::ICMP_UGE:
> +    Pred = ICmpInst::ICMP_ULE;
> +    std::swap(LHS, RHS);
> +  case ICmpInst::ICMP_ULE: {
> +    ConstantRange LHSRange = getUnsignedRange(LHS);
> +    ConstantRange RHSRange = getUnsignedRange(RHS);
> +    if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
> +      return true;
> +    if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
> +      return false;
> +
> +    const SCEV *Diff = getMinusSCEV(LHS, RHS);
> +    ConstantRange DiffRange = getUnsignedRange(Diff);
> +    if (LHSRange.getUnsignedMax().ule(DiffRange.getUnsignedMin()))
> +      return true;
> +    if (LHSRange.getUnsignedMin().ugt(DiffRange.getUnsignedMax()))
> +      return false;
> +    break;
> +  }
> +  case ICmpInst::ICMP_NE: {
> +    if
> (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
> +      return true;
> +    if
> (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
> +      return true;
> +
> +    const SCEV *Diff = getMinusSCEV(LHS, RHS);
> +    if (isKnownNonZero(Diff))
> +      return true;
> +    break;
> +  }
> +  case ICmpInst::ICMP_EQ:
> +    break;
> +  }
> +  return false;
> +}
> +
> +/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
> +/// protected by a conditional between LHS and RHS.  This is used to
> +/// to eliminate casts.
> +bool
> +ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
> +                                             ICmpInst::Predicate Pred,
> +                                             const SCEV *LHS, const SCEV
> *RHS) {
> +  // Interpret a null as meaning no loop, where there is obviously no
> guard
> +  // (interprocedural conditions notwithstanding).
> +  if (!L) return true;
> +
> +  BasicBlock *Latch = L->getLoopLatch();
> +  if (!Latch)
> +    return false;
> +
> +  BranchInst *LoopContinuePredicate =
> +    dyn_cast<BranchInst>(Latch->getTerminator());
> +  if (!LoopContinuePredicate ||
> +      LoopContinuePredicate->isUnconditional())
> +    return false;
> +
> +  return
> +    isNecessaryCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS,
> +                    LoopContinuePredicate->getSuccessor(0) !=
> L->getHeader());
> +}
> +
> +/// isLoopGuardedByCond - Test whether entry to the loop is protected
> +/// by a conditional between LHS and RHS.  This is used to help avoid max
> +/// expressions in loop trip counts, and to eliminate casts.
> +bool
> +ScalarEvolution::isLoopGuardedByCond(const Loop *L,
> +                                     ICmpInst::Predicate Pred,
> +                                     const SCEV *LHS, const SCEV *RHS) {
>   // Interpret a null as meaning no loop, where there is obviously no guard
>   // (interprocedural conditions notwithstanding).
>   if (!L) return false;
> @@ -4086,8 +4436,9 @@
>   return false;
>  }
>
> -/// isNecessaryCond - Test whether the given CondValue value is a
> condition
> -/// which is at least as strict as the one described by Pred, LHS, and
> RHS.
> +/// isNecessaryCond - Test whether the condition described by Pred, LHS,
> +/// and RHS is a necessary condition for the given Cond value to evaluate
> +/// to true.
>  bool ScalarEvolution::isNecessaryCond(Value *CondValue,
>                                       ICmpInst::Predicate Pred,
>                                       const SCEV *LHS, const SCEV *RHS,
> @@ -4112,30 +4463,35 @@
>   // see if it is the comparison we are looking for.
>   Value *PreCondLHS = ICI->getOperand(0);
>   Value *PreCondRHS = ICI->getOperand(1);
> -  ICmpInst::Predicate Cond;
> +  ICmpInst::Predicate FoundPred;
>   if (Inverse)
> -    Cond = ICI->getInversePredicate();
> +    FoundPred = ICI->getInversePredicate();
>   else
> -    Cond = ICI->getPredicate();
> +    FoundPred = ICI->getPredicate();
>
> -  if (Cond == Pred)
> +  if (FoundPred == Pred)
>     ; // An exact match.
> -  else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
> -    ; // The actual condition is beyond sufficient.
> -  else
> +  else if (!ICmpInst::isTrueWhenEqual(FoundPred) && Pred ==
> ICmpInst::ICMP_NE) {
> +    // The actual condition is beyond sufficient.
> +    FoundPred = ICmpInst::ICMP_NE;
> +    // NE is symmetric but the original comparison may not be. Swap
> +    // the operands if necessary so that they match below.
> +    if (isa<SCEVConstant>(LHS))
> +      std::swap(PreCondLHS, PreCondRHS);
> +  } else
>     // Check a few special cases.
> -    switch (Cond) {
> +    switch (FoundPred) {
>     case ICmpInst::ICMP_UGT:
>       if (Pred == ICmpInst::ICMP_ULT) {
>         std::swap(PreCondLHS, PreCondRHS);
> -        Cond = ICmpInst::ICMP_ULT;
> +        FoundPred = ICmpInst::ICMP_ULT;
>         break;
>       }
>       return false;
>     case ICmpInst::ICMP_SGT:
>       if (Pred == ICmpInst::ICMP_SLT) {
>         std::swap(PreCondLHS, PreCondRHS);
> -        Cond = ICmpInst::ICMP_SLT;
> +        FoundPred = ICmpInst::ICMP_SLT;
>         break;
>       }
>       return false;
> @@ -4144,8 +4500,8 @@
>       // so check for this case by checking if the NE is comparing against
>       // a minimum or maximum constant.
>       if (!ICmpInst::isTrueWhenEqual(Pred))
> -        if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
> -          const APInt &A = CI->getValue();
> +        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(RHS)) {
> +          const APInt &A = C->getValue()->getValue();
>           switch (Pred) {
>           case ICmpInst::ICMP_SLT:
>             if (A.isMaxSignedValue()) break;
> @@ -4162,7 +4518,7 @@
>           default:
>             return false;
>           }
> -          Cond = ICmpInst::ICMP_NE;
> +          FoundPred = Pred;
>           // NE is symmetric but the original comparison may not be. Swap
>           // the operands if necessary so that they match below.
>           if (isa<SCEVConstant>(LHS))
> @@ -4175,14 +4531,70 @@
>       return false;
>     }
>
> -  if (!PreCondLHS->getType()->isInteger()) return false;
> +  assert(Pred == FoundPred && "Conditions were not reconciled!");
> +
> +  const SCEV *FoundLHS = getSCEV(PreCondLHS);
> +  const SCEV *FoundRHS = getSCEV(PreCondRHS);
> +
> +  // Balance the types.
> +  if (getTypeSizeInBits(LHS->getType()) >
> +      getTypeSizeInBits(FoundLHS->getType())) {
> +    if (CmpInst::isSigned(Pred)) {
> +      FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
> +      FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
> +    } else {
> +      FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
> +      FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
> +    }
> +  } else if (getTypeSizeInBits(LHS->getType()) <
> +             getTypeSizeInBits(FoundLHS->getType())) {
> +    // TODO: Cast LHS and RHS to FoundLHS' type. Currently this can
> +    // result in infinite recursion since the code to construct
> +    // cast expressions may want to know things about the loop
> +    // iteration in order to do simplifications.
> +    return false;
> +  }
> +
> +  return isNecessaryCondOperands(Pred, LHS, RHS,
> +                                 FoundLHS, FoundRHS) ||
> +         // ~x < ~y --> x > y
> +         isNecessaryCondOperands(Pred, LHS, RHS,
> +                                 getNotSCEV(FoundRHS),
> getNotSCEV(FoundLHS));
> +}
> +
> +/// isNecessaryCondOperands - Test whether the condition described by
> Pred,
> +/// LHS, and RHS is a necessary condition for the condition described by
> +/// Pred, FoundLHS, and FoundRHS to evaluate to true.
> +bool
> +ScalarEvolution::isNecessaryCondOperands(ICmpInst::Predicate Pred,
> +                                         const SCEV *LHS, const SCEV *RHS,
> +                                         const SCEV *FoundLHS,
> +                                         const SCEV *FoundRHS) {
> +  switch (Pred) {
> +  default: break;
> +  case ICmpInst::ICMP_SLT:
> +    if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
> +        isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS))
> +      return true;
> +    break;
> +  case ICmpInst::ICMP_SGT:
> +    if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
> +        isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS))
> +      return true;
> +    break;
> +  case ICmpInst::ICMP_ULT:
> +    if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
> +        isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS))
> +      return true;
> +    break;
> +  case ICmpInst::ICMP_UGT:
> +    if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
> +        isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS))
> +      return true;
> +    break;
> +  }
>
> -  const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
> -  const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
> -  return (HasSameValue(LHS, PreCondLHSSCEV) &&
> -          HasSameValue(RHS, PreCondRHSSCEV)) ||
> -         (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
> -          HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
> +  return false;
>  }
>
>  /// getBECount - Subtract the end and start values and divide by the step,
> @@ -4267,9 +4679,9 @@
>     const SCEV *Start = AddRec->getOperand(0);
>
>     // Determine the minimum constant start value.
> -    const SCEV *MinStart = isa<SCEVConstant>(Start) ? Start :
> -      getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) :
> -                             APInt::getMinValue(BitWidth));
> +    const SCEV *MinStart = getConstant(isSigned ?
> +      getSignedRange(Start).getSignedMin() :
> +      getUnsignedRange(Start).getUnsignedMin());
>
>     // If we know that the condition is true in order to enter the loop,
>     // then we know that it will run exactly (m-n)/s times. Otherwise, we
> @@ -4277,18 +4689,16 @@
>     // the division must round up.
>     const SCEV *End = RHS;
>     if (!isLoopGuardedByCond(L,
> -                             isSigned ? ICmpInst::ICMP_SLT :
> ICmpInst::ICMP_ULT,
> +                             isSigned ? ICmpInst::ICMP_SLT :
> +                                        ICmpInst::ICMP_ULT,
>                              getMinusSCEV(Start, Step), RHS))
>       End = isSigned ? getSMaxExpr(RHS, Start)
>                      : getUMaxExpr(RHS, Start);
>
>     // Determine the maximum constant end value.
> -    const SCEV *MaxEnd =
> -      isa<SCEVConstant>(End) ? End :
> -      getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth)
> -                               .ashr(GetMinSignBits(End) - 1) :
> -                             APInt::getMaxValue(BitWidth)
> -                               .lshr(GetMinLeadingZeros(End)));
> +    const SCEV *MaxEnd = getConstant(isSigned ?
> +      getSignedRange(End).getSignedMax() :
> +      getUnsignedRange(End).getUnsignedMax());
>
>     // Finally, we subtract these two values and divide, rounding up, to
> get
>     // the number of times the backedge is executed.
>
> Added: llvm/trunk/test/Transforms/IndVarSimplify/iv-sext.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/iv-sext.ll?rev=75252&view=auto
>
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/IndVarSimplify/iv-sext.ll (added)
> +++ llvm/trunk/test/Transforms/IndVarSimplify/iv-sext.ll Fri Jul 10
> 11:42:52 2009
> @@ -0,0 +1,143 @@
> +; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
> +; RUN: grep {= sext} %t | count 4
> +; RUN: grep {phi i64} %t | count 2
> +
> +; Indvars should be able to promote the hiPart induction variable in the
> +; inner loop to i64.
> +; TODO: it should promote hiPart to i64 in the outer loop too.
> +
> +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
> +
> +define void @t(float* %pTmp1, float* %peakWeight, float*
> %nrgReducePeakrate, i32 %bandEdgeIndex, float %tmp1) nounwind {
> +entry:
> +       %tmp = load float* %peakWeight, align 4         ; <float> [#uses=1]
> +       %tmp2 = icmp sgt i32 %bandEdgeIndex, 0          ; <i1> [#uses=1]
> +       br i1 %tmp2, label %bb.nph22, label %return
> +
> +bb.nph22:              ; preds = %entry
> +       %tmp3 = add i32 %bandEdgeIndex, -1              ; <i32> [#uses=2]
> +       br label %bb
> +
> +bb:            ; preds = %bb8, %bb.nph22
> +       %distERBhi.121 = phi float [ %distERBhi.2.lcssa, %bb8 ], [
> 0.000000e+00, %bb.nph22 ]            ; <float> [#uses=2]
> +       %distERBlo.120 = phi float [ %distERBlo.0.lcssa, %bb8 ], [
> 0.000000e+00, %bb.nph22 ]            ; <float> [#uses=2]
> +       %hiPart.119 = phi i32 [ %hiPart.0.lcssa, %bb8 ], [ 0, %bb.nph22 ]
>             ; <i32> [#uses=3]
> +       %loPart.118 = phi i32 [ %loPart.0.lcssa, %bb8 ], [ 0, %bb.nph22 ]
>             ; <i32> [#uses=2]
> +       %peakCount.117 = phi float [ %peakCount.2.lcssa, %bb8 ], [ %tmp,
> %bb.nph22 ]            ; <float> [#uses=2]
> +       %part.016 = phi i32 [ %tmp46, %bb8 ], [ 0, %bb.nph22 ]          ;
> <i32> [#uses=5]
> +       %tmp4 = icmp sgt i32 %part.016, 0               ; <i1> [#uses=1]
> +       br i1 %tmp4, label %bb1, label %bb3.preheader
> +
> +bb1:           ; preds = %bb
> +       %tmp5 = add i32 %part.016, -1           ; <i32> [#uses=1]
> +       %tmp6 = sext i32 %tmp5 to i64           ; <i64> [#uses=1]
> +       %tmp7 = getelementptr float* %pTmp1, i64 %tmp6          ; <float*>
> [#uses=1]
> +       %tmp8 = load float* %tmp7, align 4              ; <float> [#uses=1]
> +       %tmp9 = fadd float %tmp8, %distERBlo.120                ; <float>
> [#uses=1]
> +       %tmp10 = add i32 %part.016, -1          ; <i32> [#uses=1]
> +       %tmp11 = sext i32 %tmp10 to i64         ; <i64> [#uses=1]
> +       %tmp12 = getelementptr float* %pTmp1, i64 %tmp11                ;
> <float*> [#uses=1]
> +       %tmp13 = load float* %tmp12, align 4            ; <float> [#uses=1]
> +       %tmp14 = fsub float %distERBhi.121, %tmp13              ; <float>
> [#uses=1]
> +       br label %bb3.preheader
> +
> +bb3.preheader:         ; preds = %bb1, %bb
> +       %distERBlo.0.ph = phi float [ %distERBlo.120, %bb ], [ %tmp9, %bb1
> ]            ; <float> [#uses=3]
> +       %distERBhi.0.ph = phi float [ %distERBhi.121, %bb ], [ %tmp14,
> %bb1 ]           ; <float> [#uses=3]
> +       %tmp15 = fcmp ogt float %distERBlo.0.ph, 2.500000e+00           ;
> <i1> [#uses=1]
> +       br i1 %tmp15, label %bb.nph, label %bb5.preheader
> +
> +bb.nph:                ; preds = %bb3.preheader
> +       br label %bb2
> +
> +bb2:           ; preds = %bb3, %bb.nph
> +       %distERBlo.03 = phi float [ %tmp19, %bb3 ], [ %distERBlo.0.ph,
> %bb.nph ]                ; <float> [#uses=1]
> +       %loPart.02 = phi i32 [ %tmp24, %bb3 ], [ %loPart.118, %bb.nph ]
>     ; <i32> [#uses=3]
> +       %peakCount.01 = phi float [ %tmp23, %bb3 ], [ %peakCount.117,
> %bb.nph ]         ; <float> [#uses=1]
> +       %tmp16 = sext i32 %loPart.02 to i64             ; <i64> [#uses=1]
> +       %tmp17 = getelementptr float* %pTmp1, i64 %tmp16                ;
> <float*> [#uses=1]
> +       %tmp18 = load float* %tmp17, align 4            ; <float> [#uses=1]
> +       %tmp19 = fsub float %distERBlo.03, %tmp18               ; <float>
> [#uses=3]
> +       %tmp20 = sext i32 %loPart.02 to i64             ; <i64> [#uses=1]
> +       %tmp21 = getelementptr float* %peakWeight, i64 %tmp20           ;
> <float*> [#uses=1]
> +       %tmp22 = load float* %tmp21, align 4            ; <float> [#uses=1]
> +       %tmp23 = fsub float %peakCount.01, %tmp22               ; <float>
> [#uses=2]
> +       %tmp24 = add i32 %loPart.02, 1          ; <i32> [#uses=2]
> +       br label %bb3
> +
> +bb3:           ; preds = %bb2
> +       %tmp25 = fcmp ogt float %tmp19, 2.500000e+00            ; <i1>
> [#uses=1]
> +       br i1 %tmp25, label %bb2, label %bb3.bb5.preheader_crit_edge
> +
> +bb3.bb5.preheader_crit_edge:           ; preds = %bb3
> +       %tmp24.lcssa = phi i32 [ %tmp24, %bb3 ]         ; <i32> [#uses=1]
> +       %tmp23.lcssa = phi float [ %tmp23, %bb3 ]               ; <float>
> [#uses=1]
> +       %tmp19.lcssa = phi float [ %tmp19, %bb3 ]               ; <float>
> [#uses=1]
> +       br label %bb5.preheader
> +
> +bb5.preheader:         ; preds = %bb3.bb5.preheader_crit_edge,
> %bb3.preheader
> +       %distERBlo.0.lcssa = phi float [ %tmp19.lcssa,
> %bb3.bb5.preheader_crit_edge ], [ %distERBlo.0.ph, %bb3.preheader ]
>        ; <float> [#uses=2]
> +       %loPart.0.lcssa = phi i32 [ %tmp24.lcssa,
> %bb3.bb5.preheader_crit_edge ], [ %loPart.118, %bb3.preheader ]
>   ; <i32> [#uses=1]
> +       %peakCount.0.lcssa = phi float [ %tmp23.lcssa,
> %bb3.bb5.preheader_crit_edge ], [ %peakCount.117, %bb3.preheader ]
>     ; <float> [#uses=2]
> +       %.not10 = fcmp olt float %distERBhi.0.ph, 2.500000e+00          ;
> <i1> [#uses=1]
> +       %tmp26 = icmp sgt i32 %tmp3, %hiPart.119                ; <i1>
> [#uses=1]
> +       %or.cond11 = and i1 %tmp26, %.not10             ; <i1> [#uses=1]
> +       br i1 %or.cond11, label %bb.nph12, label %bb7
> +
> +bb.nph12:              ; preds = %bb5.preheader
> +       br label %bb4
> +
> +bb4:           ; preds = %bb5, %bb.nph12
> +       %distERBhi.29 = phi float [ %tmp30, %bb5 ], [ %distERBhi.0.ph,
> %bb.nph12 ]              ; <float> [#uses=1]
> +       %hiPart.08 = phi i32 [ %tmp31, %bb5 ], [ %hiPart.119, %bb.nph12 ]
>             ; <i32> [#uses=2]
> +       %peakCount.27 = phi float [ %tmp35, %bb5 ], [ %peakCount.0.lcssa,
> %bb.nph12 ]           ; <float> [#uses=1]
> +       %tmp27 = sext i32 %hiPart.08 to i64             ; <i64> [#uses=1]
> +       %tmp28 = getelementptr float* %pTmp1, i64 %tmp27                ;
> <float*> [#uses=1]
> +       %tmp29 = load float* %tmp28, align 4            ; <float> [#uses=1]
> +       %tmp30 = fadd float %tmp29, %distERBhi.29               ; <float>
> [#uses=3]
> +       %tmp31 = add i32 %hiPart.08, 1          ; <i32> [#uses=4]
> +       %tmp32 = sext i32 %tmp31 to i64         ; <i64> [#uses=1]
> +       %tmp33 = getelementptr float* %peakWeight, i64 %tmp32           ;
> <float*> [#uses=1]
> +       %tmp34 = load float* %tmp33, align 4            ; <float> [#uses=1]
> +       %tmp35 = fadd float %tmp34, %peakCount.27               ; <float>
> [#uses=2]
> +       br label %bb5
> +
> +bb5:           ; preds = %bb4
> +       %.not = fcmp olt float %tmp30, 2.500000e+00             ; <i1>
> [#uses=1]
> +       %tmp36 = icmp sgt i32 %tmp3, %tmp31             ; <i1> [#uses=1]
> +       %or.cond = and i1 %tmp36, %.not         ; <i1> [#uses=1]
> +       br i1 %or.cond, label %bb4, label %bb5.bb7_crit_edge
> +
> +bb5.bb7_crit_edge:             ; preds = %bb5
> +       %tmp35.lcssa = phi float [ %tmp35, %bb5 ]               ; <float>
> [#uses=1]
> +       %tmp31.lcssa = phi i32 [ %tmp31, %bb5 ]         ; <i32> [#uses=1]
> +       %tmp30.lcssa = phi float [ %tmp30, %bb5 ]               ; <float>
> [#uses=1]
> +       br label %bb7
> +
> +bb7:           ; preds = %bb5.bb7_crit_edge, %bb5.preheader
> +       %distERBhi.2.lcssa = phi float [ %tmp30.lcssa, %bb5.bb7_crit_edge
> ], [ %distERBhi.0.ph, %bb5.preheader ]                ; <float> [#uses=2]
> +       %hiPart.0.lcssa = phi i32 [ %tmp31.lcssa, %bb5.bb7_crit_edge ], [
> %hiPart.119, %bb5.preheader ]         ; <i32> [#uses=1]
> +       %peakCount.2.lcssa = phi float [ %tmp35.lcssa, %bb5.bb7_crit_edge
> ], [ %peakCount.0.lcssa, %bb5.preheader ]             ; <float> [#uses=2]
> +       %tmp37 = fadd float %distERBlo.0.lcssa, %distERBhi.2.lcssa
>      ; <float> [#uses=1]
> +       %tmp38 = fdiv float %peakCount.2.lcssa, %tmp37          ; <float>
> [#uses=1]
> +       %tmp39 = fmul float %tmp38, %tmp1               ; <float> [#uses=2]
> +       %tmp40 = fmul float %tmp39, %tmp39              ; <float> [#uses=2]
> +       %tmp41 = fmul float %tmp40, %tmp40              ; <float> [#uses=1]
> +       %tmp42 = fadd float %tmp41, 1.000000e+00                ; <float>
> [#uses=1]
> +       %tmp43 = fdiv float 1.000000e+00, %tmp42                ; <float>
> [#uses=1]
> +       %tmp44 = sext i32 %part.016 to i64              ; <i64> [#uses=1]
> +       %tmp45 = getelementptr float* %nrgReducePeakrate, i64 %tmp44
>      ; <float*> [#uses=1]
> +       store float %tmp43, float* %tmp45, align 4
> +       %tmp46 = add i32 %part.016, 1           ; <i32> [#uses=2]
> +       br label %bb8
> +
> +bb8:           ; preds = %bb7
> +       %tmp47 = icmp slt i32 %tmp46, %bandEdgeIndex            ; <i1>
> [#uses=1]
> +       br i1 %tmp47, label %bb, label %bb8.return_crit_edge
> +
> +bb8.return_crit_edge:          ; preds = %bb8
> +       br label %return
> +
> +return:                ; preds = %bb8.return_crit_edge, %entry
> +       ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20090710/c22f5b50/attachment.html>


More information about the llvm-commits mailing list