[llvm] r215647 - [Reassociation] Add support for reassociation with unsafe algebra.

Thu Nov 6 09:05:11 PST 2014

Hi Mehdi,
I've committed your patch in r221462; I wasn't sure if you had commit
access or not.  I've also CC'ed a few interested parties (i.e., Chandler
and Hal) in case they'd like to further comment.  This is something that's
been on my mind for some time and AFAICT this is the correct fix.

Thanks for working on this!

 Chad

> Hi Chad,
>
> After more digging, I found out that floating-point factorization is
> indeed
> ok for fast-math operations.
>
> The attached patch should fix the mixing of regular and fast operations
> without break fast-math factorization.
>
> Best,
>
> Mehdi
>
>
> 2014-11-05 23:09 GMT-08:00 Mehdi Amini <joker.eph at gmail.com>:
>
>> Hi Chad,
>>
>> This commit does not only bring reassociation but also factorization,
>> which I am not sure is allowed for floating point even in fast-math.
>> The attached patch disables floating point refactorization while keeping
>> reassociation.
>>
>> Moreover I have a crash currently when mixing fast and regular
>> operation,
>> see test/Transforms/Reassociate/mixed-fast-nonfast-fp.ll in the patch.
>>
>> Best,
>>
>> Mehdi
>>
>>
>>
>>
>> On 8/14/14, 8:23 AM, Chad Rosier wrote:
>>
>>> Author: mcrosier
>>> Date: Thu Aug 14 10:23:01 2014
>>> New Revision: 215647
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=215647&view=rev
>>> Log:
>>> [Reassociation] Add support for reassociation with unsafe algebra.
>>>
>>> Vector instructions are (still) not supported for either integer or
>>> floating
>>> point.  Hopefully, that work will be landed shortly.
>>>
>>> Added:
>>>      llvm/trunk/test/Transforms/Reassociate/fast-AgressiveSubMove.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-MissedTree.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-ReassociateVector.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-SubReassociate.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-basictest.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-fp-commute.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-mightymul.ll
>>>      llvm/trunk/test/Transforms/Reassociate/fast-multistep.ll
>>> Modified:
>>>      llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp
>>>
>>> Modified: llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/
>>> Transforms/Scalar/Reassociate.cpp?rev=215647&r1=215646&r2=
>>> 215647&view=diff
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp Thu Aug 14
>>> 10:23:01
>>> 2014
>>> @@ -240,6 +240,15 @@ static BinaryOperator *isReassociableOp(
>>>     return nullptr;
>>>   }
>>>   +static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
>>> +                                        unsigned Opcode2) {
>>> +  if (V->hasOneUse() && isa<Instruction>(V) &&
>>> +      (cast<Instruction>(V)->getOpcode() == Opcode1 ||
>>> +       cast<Instruction>(V)->getOpcode() == Opcode2))
>>> +    return cast<BinaryOperator>(V);
>>> +  return nullptr;
>>> +}
>>> +
>>>   static bool isUnmovableInstruction(Instruction *I) {
>>>     switch (I->getOpcode()) {
>>>     case Instruction::PHI:
>>> @@ -304,8 +313,10 @@ unsigned Reassociate::getRank(Value *V)
>>>       // If this is a not or neg instruction, do not count it for rank.
>>> This
>>>     // assures us that X and ~X will have the same rank.
>>> -  if (!I->getType()->isIntegerTy() ||
>>> -      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
>>> +  Type *Ty = V->getType();
>>> +  if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) ||
>>> +      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
>>> +       !BinaryOperator::isFNeg(I)))
>>>       ++Rank;
>>>       //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
>>> @@ -314,14 +325,50 @@ unsigned Reassociate::getRank(Value *V)
>>>     return ValueRankMap[I] = Rank;
>>>   }
>>>   +static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine
>>> &Name,
>>> +                                 Instruction *InsertBefore, Value
>>> *FlagsOp) {
>>> +  if (S1->getType()->isIntegerTy())
>>> +    return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
>>> +  else {
>>> +    BinaryOperator *Res =
>>> +        BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
>>> +    Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->
>>> getFastMathFlags());
>>> +    return Res;
>>> +  }
>>> +}
>>> +
>>> +static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine
>>> &Name,
>>> +                                 Instruction *InsertBefore, Value
>>> *FlagsOp) {
>>> +  if (S1->getType()->isIntegerTy())
>>> +    return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
>>> +  else {
>>> +    BinaryOperator *Res =
>>> +      BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
>>> +    Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->
>>> getFastMathFlags());
>>> +    return Res;
>>> +  }
>>> +}
>>> +
>>> +static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
>>> +                                 Instruction *InsertBefore, Value
>>> *FlagsOp) {
>>> +  if (S1->getType()->isIntegerTy())
>>> +    return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
>>> +  else {
>>> +    BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name,
>>> InsertBefore);
>>> +    Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->
>>> getFastMathFlags());
>>> +    return Res;
>>> +  }
>>> +}
>>> +
>>>   /// LowerNegateToMultiply - Replace 0-X with X*-1.
>>>   ///
>>>   static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
>>> -  Constant *Cst = Constant::getAllOnesValue(Neg->getType());
>>> +  Type *Ty = Neg->getType();
>>> +  Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(
>>> Ty)
>>> +                                       : ConstantFP::get(Ty, -1.0);
>>>   -  BinaryOperator *Res =
>>> -    BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
>>> -  Neg->setOperand(1, Constant::getNullValue(Neg->getType())); // Drop
>>> use of op.
>>> +  BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg,
>>> Neg);
>>> +  Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
>>>     Res->takeName(Neg);
>>>     Neg->replaceAllUsesWith(Res);
>>>     Res->setDebugLoc(Neg->getDebugLoc());
>>> @@ -377,13 +424,14 @@ static void IncorporateWeight(APInt &LHS
>>>       LHS = 0; // 1 + 1 === 0 modulo 2.
>>>       return;
>>>     }
>>> -  if (Opcode == Instruction::Add) {
>>> +  if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
>>>       // TODO: Reduce the weight by exploiting nsw/nuw?
>>>       LHS += RHS;
>>>       return;
>>>     }
>>>   -  assert(Opcode == Instruction::Mul && "Unknown associative
>>> operation!");
>>> +  assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul)
>>> &&
>>> +         "Unknown associative operation!");
>>>     unsigned Bitwidth = LHS.getBitWidth();
>>>     // If CM is the Carmichael number then a weight W satisfying W >=
>>> CM+Bitwidth
>>>     // can be replaced with W-CM.  That's because x^W=x^(W-CM) for
>>> every
>>> Bitwidth
>>> @@ -499,8 +547,7 @@ static bool LinearizeExprTree(BinaryOper
>>>     DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
>>>     unsigned Bitwidth = I->getType()->getScalarType()-
>>> >getPrimitiveSizeInBits();
>>>     unsigned Opcode = I->getOpcode();
>>> -  assert(Instruction::isAssociative(Opcode) &&
>>> -         Instruction::isCommutative(Opcode) &&
>>> +  assert(I->isAssociative() && I->isCommutative() &&
>>>            "Expected an associative and commutative operation!");
>>>       // Visit all operands of the expression, keeping track of their
>>> weight (the
>>> @@ -619,15 +666,16 @@ static bool LinearizeExprTree(BinaryOper
>>>           // If this is a multiply expression, turn any internal
>>> negations into
>>>         // multiplies by -1 so they can be reassociated.
>>> -      BinaryOperator *BO = dyn_cast<BinaryOperator>(Op);
>>> -      if (Opcode == Instruction::Mul && BO &&
>>> BinaryOperator::isNeg(BO))
>>> {
>>> -        DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ")
>>> TO
>>> ");
>>> -        BO = LowerNegateToMultiply(BO);
>>> -        DEBUG(dbgs() << *BO << 'n');
>>> -        Worklist.push_back(std::make_pair(BO, Weight));
>>> -        MadeChange = true;
>>> -        continue;
>>> -      }
>>> +      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
>>> +        if ((Opcode == Instruction::Mul && BinaryOperator::isNeg(BO))
>>> ||
>>> +            (Opcode == Instruction::FMul &&
>>> BinaryOperator::isFNeg(BO)))
>>> {
>>> +          DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight <<
>>> ")
>>> TO ");
>>> +          BO = LowerNegateToMultiply(BO);
>>> +          DEBUG(dbgs() << *BO << '\n');
>>> +          Worklist.push_back(std::make_pair(BO, Weight));
>>> +          MadeChange = true;
>>> +          continue;
>>> +        }
>>>           // Failed to morph into an expression of the right type.
>>> This
>>> really is
>>>         // a leaf.
>>> @@ -798,6 +846,8 @@ void Reassociate::RewriteExprTree(Binary
>>>         Constant *Undef = UndefValue::get(I->getType());
>>>         NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
>>>                                        Undef, Undef, "", I);
>>> +      if (NewOp->getType()->isFloatingPointTy())
>>> +        NewOp->setFastMathFlags(I->getFastMathFlags());
>>>       } else {
>>>         NewOp = NodesToRewrite.pop_back_val();
>>>       }
>>> @@ -817,7 +867,14 @@ void Reassociate::RewriteExprTree(Binary
>>>     // expression tree is dominated by all of Ops.
>>>     if (ExpressionChanged)
>>>       do {
>>> -      ExpressionChanged->clearSubclassOptionalData();
>>> +      // Preserve FastMathFlags.
>>> +      if (isa<FPMathOperator>(I)) {
>>> +        FastMathFlags Flags = I->getFastMathFlags();
>>> +        ExpressionChanged->clearSubclassOptionalData();
>>> +        ExpressionChanged->setFastMathFlags(Flags);
>>> +      } else
>>> +        ExpressionChanged->clearSubclassOptionalData();
>>> +
>>>         if (ExpressionChanged == I)
>>>           break;
>>>         ExpressionChanged->moveBefore(I);
>>> @@ -834,6 +891,8 @@ void Reassociate::RewriteExprTree(Binary
>>>   /// version of the value is returned, and BI is left pointing at the
>>> instruction
>>>   /// that should be processed next by the reassociation pass.
>>>   static Value *NegateValue(Value *V, Instruction *BI) {
>>> +  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
>>> +    return ConstantExpr::getFNeg(C);
>>>     if (Constant *C = dyn_cast<Constant>(V))
>>>       return ConstantExpr::getNeg(C);
>>>   @@ -846,7 +905,8 @@ static Value *NegateValue(Value *V, Inst
>>>     // the constants.  We assume that instcombine will clean up the
>>> mess
>>> later if
>>>     // we introduce tons of unnecessary negation instructions.
>>>     //
>>> -  if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) {
>>> +  if (BinaryOperator *I =
>>> +          isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
>>>       // Push the negates through the add.
>>>       I->setOperand(0, NegateValue(I->getOperand(0), BI));
>>>       I->setOperand(1, NegateValue(I->getOperand(1), BI));
>>> @@ -864,7 +924,8 @@ static Value *NegateValue(Value *V, Inst
>>>     // Okay, we need to materialize a negated version of V with an
>>> instruction.
>>>     // Scan the use lists of V to see if we have one already.
>>>     for (User *U : V->users()) {
>>> -    if (!BinaryOperator::isNeg(U)) continue;
>>> +    if (!BinaryOperator::isNeg(U) && !BinaryOperator::isFNeg(U))
>>> +      continue;
>>>         // We found one!  Now we have to make sure that the definition
>>> dominates
>>>       // this use.  We do this by moving it to the entry block (if it
>>> is a
>>> @@ -894,27 +955,30 @@ static Value *NegateValue(Value *V, Inst
>>>       // Insert a 'neg' instruction that subtracts the value from zero
>>> to
>>> get the
>>>     // negation.
>>> -  return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
>>> +  return CreateNeg(V, V->getName() + ".neg", BI, BI);
>>>   }
>>>     /// ShouldBreakUpSubtract - Return true if we should break up this
>>> subtract of
>>>   /// X-Y into (X + -Y).
>>>   static bool ShouldBreakUpSubtract(Instruction *Sub) {
>>>     // If this is a negation, we can't split it up!
>>> -  if (BinaryOperator::isNeg(Sub))
>>> +  if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub))
>>>       return false;
>>>       // Don't bother to break this up unless either the LHS is an
>>> associable add or
>>>     // subtract or if this is only used by one.
>>> -  if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
>>> -      isReassociableOp(Sub->getOperand(0), Instruction::Sub))
>>> +  Value *V0 = Sub->getOperand(0);
>>> +  if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
>>> +      isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
>>>       return true;
>>> -  if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
>>> -      isReassociableOp(Sub->getOperand(1), Instruction::Sub))
>>> +  Value *V1 = Sub->getOperand(1);
>>> +  if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
>>> +      isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
>>>       return true;
>>> +  Value *VB = Sub->user_back();
>>>     if (Sub->hasOneUse() &&
>>> -      (isReassociableOp(Sub->user_back(), Instruction::Add) ||
>>> -       isReassociableOp(Sub->user_back(), Instruction::Sub)))
>>> +      (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
>>> +       isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
>>>       return true;
>>>       return false;
>>> @@ -931,8 +995,7 @@ static BinaryOperator *BreakUpSubtract(I
>>>     // and set it as the RHS of the add instruction we just made.
>>>     //
>>>     Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
>>> -  BinaryOperator *New =
>>> -    BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
>>> +  BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub,
>>> Sub);
>>>     Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop
>>> use of op.
>>>     Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop
>>> use of op.
>>>     New->takeName(Sub);
>>> @@ -988,15 +1051,16 @@ static Value *EmitAddTreeOfValues(Instru
>>>     Value *V1 = Ops.back();
>>>     Ops.pop_back();
>>>     Value *V2 = EmitAddTreeOfValues(I, Ops);
>>> -  return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
>>> +  return CreateAdd(V2, V1, "tmp", I, I);
>>>   }
>>>     /// RemoveFactorFromExpression - If V is an expression tree that is
>>> a
>>>   /// multiplication sequence, and if this sequence contains a multiply
>>> by Factor,
>>>   /// remove Factor from the tree and return the new tree.
>>>   Value *Reassociate::RemoveFactorFromExpression(Value *V, Value
>>> *Factor) {
>>> -  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
>>> -  if (!BO) return nullptr;
>>> +  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul,
>>> Instruction::FMul);
>>> +  if (!BO)
>>> +    return nullptr;
>>>       SmallVector<RepeatedValue, 8> Tree;
>>>     MadeChange |= LinearizeExprTree(BO, Tree);
>>> @@ -1018,13 +1082,25 @@ Value *Reassociate::RemoveFactorFromExpr
>>>       }
>>>         // If this is a negative version of this factor, remove it.
>>> -    if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
>>> +    if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
>>>         if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
>>>           if (FC1->getValue() == -FC2->getValue()) {
>>>             FoundFactor = NeedsNegate = true;
>>>             Factors.erase(Factors.begin()+i);
>>>             break;
>>>           }
>>> +    } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
>>> +      if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
>>> +        APFloat F1(FC1->getValueAPF());
>>> +        APFloat F2(FC2->getValueAPF());
>>> +        F2.changeSign();
>>> +        if (F1.compare(F2) == APFloat::cmpEqual) {
>>> +          FoundFactor = NeedsNegate = true;
>>> +          Factors.erase(Factors.begin() + i);
>>> +          break;
>>> +        }
>>> +      }
>>> +    }
>>>     }
>>>       if (!FoundFactor) {
>>> @@ -1046,7 +1122,7 @@ Value *Reassociate::RemoveFactorFromExpr
>>>     }
>>>       if (NeedsNegate)
>>> -    V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
>>> +    V = CreateNeg(V, "neg", InsertPt, BO);
>>>       return V;
>>>   }
>>> @@ -1058,7 +1134,7 @@ Value *Reassociate::RemoveFactorFromExpr
>>>   static void FindSingleUseMultiplyFactors(Value *V,
>>>                                            SmallVectorImpl<Value*>
>>> &Factors,
>>>                                          const
>>> SmallVectorImpl<ValueEntry> &Ops) {
>>> -  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
>>> +  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul,
>>> Instruction::FMul);
>>>     if (!BO) {
>>>       Factors.push_back(V);
>>>       return;
>>> @@ -1389,13 +1465,15 @@ Value *Reassociate::OptimizeAdd(Instruct
>>>         ++NumFactor;
>>>           // Insert a new multiply.
>>> -      Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()),
>>> NumFound);
>>> -      Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
>>> +      Type *Ty = TheOp->getType();
>>> +      Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound)
>>> +                                      : ConstantFP::get(Ty, NumFound);
>>> +      Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
>>>           // Now that we have inserted a multiply, optimize it. This
>>> allows us to
>>>         // handle cases that require multiple factoring steps, such as
>>> this:
>>>         // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
>>> -      RedoInsts.insert(cast<Instruction>(Mul));
>>> +      RedoInsts.insert(Mul);
>>>           // If every add operand was a duplicate, return the multiply.
>>>         if (Ops.empty())
>>> @@ -1412,11 +1490,12 @@ Value *Reassociate::OptimizeAdd(Instruct
>>>       }
>>>         // Check for X and -X or X and ~X in the operand list.
>>> -    if (!BinaryOperator::isNeg(TheOp) &&
>>> !BinaryOperator::isNot(TheOp))
>>> +    if (!BinaryOperator::isNeg(TheOp) &&
>>> !BinaryOperator::isFNeg(TheOp)
>>> &&
>>> +        !BinaryOperator::isNot(TheOp))
>>>         continue;
>>>         Value *X = nullptr;
>>> -    if (BinaryOperator::isNeg(TheOp))
>>> +    if (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp))
>>>         X = BinaryOperator::getNegArgument(TheOp);
>>>       else if (BinaryOperator::isNot(TheOp))
>>>         X = BinaryOperator::getNotArgument(TheOp);
>>> @@ -1426,7 +1505,8 @@ Value *Reassociate::OptimizeAdd(Instruct
>>>         continue;
>>>         // Remove X and -X from the operand list.
>>> -    if (Ops.size() == 2 && BinaryOperator::isNeg(TheOp))
>>> +    if (Ops.size() == 2 &&
>>> +        (BinaryOperator::isNeg(TheOp) ||
>>> BinaryOperator::isFNeg(TheOp)))
>>>         return Constant::getNullValue(X->getType());
>>>         // Remove X and ~X from the operand list.
>>> @@ -1463,7 +1543,8 @@ Value *Reassociate::OptimizeAdd(Instruct
>>>     unsigned MaxOcc = 0;
>>>     Value *MaxOccVal = nullptr;
>>>     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
>>> -    BinaryOperator *BOp = isReassociableOp(Ops[i].Op,
>>> Instruction::Mul);
>>> +    BinaryOperator *BOp =
>>> +        isReassociableOp(Ops[i].Op, Instruction::Mul,
>>> Instruction::FMul);
>>>       if (!BOp)
>>>         continue;
>>>   @@ -1476,23 +1557,43 @@ Value *Reassociate::OptimizeAdd(Instruct
>>>       SmallPtrSet<Value*, 8> Duplicates;
>>>       for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
>>>         Value *Factor = Factors[i];
>>> -      if (!Duplicates.insert(Factor)) continue;
>>> +      if (!Duplicates.insert(Factor))
>>> +        continue;
>>>           unsigned Occ = ++FactorOccurrences[Factor];
>>> -      if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
>>> +      if (Occ > MaxOcc) {
>>> +        MaxOcc = Occ;
>>> +        MaxOccVal = Factor;
>>> +      }
>>>           // If Factor is a negative constant, add the negated value as
>>> a
>>> factor
>>>         // because we can percolate the negate out.  Watch for minint,
>>> which
>>>         // cannot be positivified.
>>> -      if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
>>> +      if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
>>>           if (CI->isNegative() && !CI->isMinValue(true)) {
>>>             Factor = ConstantInt::get(CI->getContext(),
>>> -CI->getValue());
>>>             assert(!Duplicates.count(Factor) &&
>>>                    "Shouldn't have two constant factors, missed a
>>> canonicalize");
>>> -
>>>             unsigned Occ = ++FactorOccurrences[Factor];
>>> -          if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
>>> +          if (Occ > MaxOcc) {
>>> +            MaxOcc = Occ;
>>> +            MaxOccVal = Factor;
>>> +          }
>>>           }
>>> +      } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
>>> +        if (CF->isNegative()) {
>>> +          APFloat F(CF->getValueAPF());
>>> +          F.changeSign();
>>> +          Factor = ConstantFP::get(CF->getContext(), F);
>>> +          assert(!Duplicates.count(Factor) &&
>>> +                 "Shouldn't have two constant factors, missed a
>>> canonicalize");
>>> +          unsigned Occ = ++FactorOccurrences[Factor];
>>> +          if (Occ > MaxOcc) {
>>> +            MaxOcc = Occ;
>>> +            MaxOccVal = Factor;
>>> +          }
>>> +        }
>>> +      }
>>>       }
>>>     }
>>>   @@ -1505,11 +1606,16 @@ Value *Reassociate::OptimizeAdd(Instruct
>>>       // this, we could otherwise run into situations where removing a
>>> factor
>>>       // from an expression will drop a use of maxocc, and this can
>>> cause
>>>       // RemoveFactorFromExpression on successive values to behave
>>> differently.
>>> -    Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal,
>>> MaxOccVal);
>>> +    Instruction *DummyInst =
>>> +        I->getType()->isIntegerTy()
>>> +            ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
>>> +            : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
>>> +
>>>       SmallVector<WeakVH, 4> NewMulOps;
>>>       for (unsigned i = 0; i != Ops.size(); ++i) {
>>>         // Only try to remove factors from expressions we're allowed
>>> to.
>>> -      BinaryOperator *BOp = isReassociableOp(Ops[i].Op,
>>> Instruction::Mul);
>>> +      BinaryOperator *BOp =
>>> +          isReassociableOp(Ops[i].Op, Instruction::Mul,
>>> Instruction::FMul);
>>>         if (!BOp)
>>>           continue;
>>>   @@ -1542,7 +1648,7 @@ Value *Reassociate::OptimizeAdd(Instruct
>>>         RedoInsts.insert(VI);
>>>         // Create the multiply.
>>> -    Instruction *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp",
>>> I);
>>> +    Instruction *V2 = CreateMul(V, MaxOccVal, "tmp", I, I);
>>>         // Rerun associate on the multiply in case the inner expression
>>> turned into
>>>       // a multiply.  We want to make sure that we keep things in
>>> canonical form.
>>> @@ -1632,7 +1738,10 @@ static Value *buildMultiplyTree(IRBuilde
>>>       Value *LHS = Ops.pop_back_val();
>>>     do {
>>> -    LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
>>> +    if (LHS->getType()->isIntegerTy())
>>> +      LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
>>> +    else
>>> +      LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
>>>     } while (!Ops.empty());
>>>       return LHS;
>>> @@ -1765,11 +1874,13 @@ Value *Reassociate::OptimizeExpression(B
>>>       break;
>>>       case Instruction::Add:
>>> +  case Instruction::FAdd:
>>>       if (Value *Result = OptimizeAdd(I, Ops))
>>>         return Result;
>>>       break;
>>>       case Instruction::Mul:
>>> +  case Instruction::FMul:
>>>       if (Value *Result = OptimizeMul(I, Ops))
>>>         return Result;
>>>       break;
>>> @@ -1810,8 +1921,7 @@ void Reassociate::OptimizeInst(Instructi
>>>     if (!isa<BinaryOperator>(I))
>>>       return;
>>>   -  if (I->getOpcode() == Instruction::Shl &&
>>> -      isa<ConstantInt>(I->getOperand(1)))
>>> +  if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->
>>> getOperand(1)))
>>>       // If an operand of this shift is a reassociable multiply, or if
>>> the shift
>>>       // is used by a reassociable multiply or add, turn into a
>>> multiply.
>>>       if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
>>> @@ -1824,28 +1934,33 @@ void Reassociate::OptimizeInst(Instructi
>>>         I = NI;
>>>       }
>>>   -  // Floating point binary operators are not associative, but we can
>>> still
>>> -  // commute (some) of them, to canonicalize the order of their
>>> operands.
>>> -  // This can potentially expose more CSE opportunities, and makes
>>> writing
>>> -  // other transformations simpler.
>>> -  if ((I->getType()->isFloatingPointTy() ||
>>> I->getType()->isVectorTy())) {
>>> -    // FAdd and FMul can be commuted.
>>> -    if (I->getOpcode() != Instruction::FMul &&
>>> -        I->getOpcode() != Instruction::FAdd)
>>> -      return;
>>> +  // Commute floating point binary operators, to canonicalize the
>>> order
>>> of their
>>> +  // operands.  This can potentially expose more CSE opportunities,
>>> and
>>> makes
>>> +  // writing other transformations simpler.
>>> +  if (I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())
>>> {
>>>   -    Value *LHS = I->getOperand(0);
>>> -    Value *RHS = I->getOperand(1);
>>> -    unsigned LHSRank = getRank(LHS);
>>> -    unsigned RHSRank = getRank(RHS);
>>> -
>>> -    // Sort the operands by rank.
>>> -    if (RHSRank < LHSRank) {
>>> -      I->setOperand(0, RHS);
>>> -      I->setOperand(1, LHS);
>>> +    // FAdd and FMul can be commuted.
>>> +    if (I->getOpcode() == Instruction::FMul ||
>>> +        I->getOpcode() == Instruction::FAdd) {
>>> +      Value *LHS = I->getOperand(0);
>>> +      Value *RHS = I->getOperand(1);
>>> +      unsigned LHSRank = getRank(LHS);
>>> +      unsigned RHSRank = getRank(RHS);
>>> +
>>> +      // Sort the operands by rank.
>>> +      if (RHSRank < LHSRank) {
>>> +        I->setOperand(0, RHS);
>>> +        I->setOperand(1, LHS);
>>> +      }
>>>       }
>>>   -    return;
>>> +    // FIXME: We should commute vector instructions as well.  However,
>>> this
>>> +    // requires further analysis to determine the effect on later
>>> passes.
>>> +
>>> +    // Don't try to optimize vector instructions or anything that
>>> doesn't have
>>> +    // unsafe algebra.
>>> +    if (I->getType()->isVectorTy() || !I->hasUnsafeAlgebra())
>>> +      return;
>>>     }
>>>       // Do not reassociate boolean (i1) expressions.  We want to
>>> preserve the
>>> @@ -1877,6 +1992,24 @@ void Reassociate::OptimizeInst(Instructi
>>>           I = NI;
>>>         }
>>>       }
>>> +  } else if (I->getOpcode() == Instruction::FSub) {
>>> +    if (ShouldBreakUpSubtract(I)) {
>>> +      Instruction *NI = BreakUpSubtract(I);
>>> +      RedoInsts.insert(I);
>>> +      MadeChange = true;
>>> +      I = NI;
>>> +    } else if (BinaryOperator::isFNeg(I)) {
>>> +      // Otherwise, this is a negation.  See if the operand is a
>>> multiply tree
>>> +      // and if this is not an inner node of a multiply tree.
>>> +      if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
>>> +          (!I->hasOneUse() ||
>>> +           !isReassociableOp(I->user_back(), Instruction::FMul))) {
>>> +        Instruction *NI = LowerNegateToMultiply(I);
>>> +        RedoInsts.insert(I);
>>> +        MadeChange = true;
>>> +        I = NI;
>>> +      }
>>> +    }
>>>     }
>>>       // If this instruction is an associative binary operator, process
>>> it.
>>> @@ -1894,11 +2027,16 @@ void Reassociate::OptimizeInst(Instructi
>>>     if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
>>>         cast<Instruction>(BO->user_back())->getOpcode() ==
>>> Instruction::Sub)
>>>       return;
>>> +  if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
>>> +      cast<Instruction>(BO->user_back())->getOpcode() ==
>>> Instruction::FSub)
>>> +    return;
>>>       ReassociateExpression(BO);
>>>   }
>>>     void Reassociate::ReassociateExpression(BinaryOperator *I) {
>>> +  assert(!I->getType()->isVectorTy() &&
>>> +         "Reassociation of vector instructions is not supported.");
>>>       // First, walk the expression tree, linearizing the tree,
>>> collecting the
>>>     // operand information.
>>> @@ -1943,12 +2081,21 @@ void Reassociate::ReassociateExpression(
>>>     // this is a multiply tree used only by an add, and the immediate
>>> is
>>> a -1.
>>>     // In this case we reassociate to put the negation on the outside
>>> so
>>> that we
>>>     // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
>>> -  if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
>>> -      cast<Instruction>(I->user_back())->getOpcode() ==
>>> Instruction::Add &&
>>> -      isa<ConstantInt>(Ops.back().Op) &&
>>> -      cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
>>> -    ValueEntry Tmp = Ops.pop_back_val();
>>> -    Ops.insert(Ops.begin(), Tmp);
>>> +  if (I->hasOneUse()) {
>>> +    if (I->getOpcode() == Instruction::Mul &&
>>> +        cast<Instruction>(I->user_back())->getOpcode() ==
>>> Instruction::Add &&
>>> +        isa<ConstantInt>(Ops.back().Op) &&
>>> +        cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
>>> +      ValueEntry Tmp = Ops.pop_back_val();
>>> +      Ops.insert(Ops.begin(), Tmp);
>>> +    } else if (I->getOpcode() == Instruction::FMul &&
>>> +               cast<Instruction>(I->user_back())->getOpcode() ==
>>> +                   Instruction::FAdd &&
>>> +               isa<ConstantFP>(Ops.back().Op) &&
>>> +               cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0))
>>> {
>>> +      ValueEntry Tmp = Ops.pop_back_val();
>>> +      Ops.insert(Ops.begin(), Tmp);
>>> +    }
>>>     }
>>>       DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-AgressiveSubMove.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-AgressiveSubMove.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-AgressiveSubMove.ll
>>> (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-AgressiveSubMove.ll Thu
>>> Aug 14 10:23:01 2014
>>> @@ -0,0 +1,24 @@
>>> +; RUN: opt < %s -reassociate -S | FileCheck %s
>>> +
>>> +define float @test1(float %A) {
>>> +; CHECK-LABEL: test1
>>> +; CHECK-NEXT: %X = fadd float 1.000000e+00, %A
>>> +; CHECK-NEXT: %Y = fadd float 1.000000e+00, %A
>>> +; CHECK-NEXT: %r = fsub float %X, %Y
>>> +; CHECK-NEXT: ret float %r
>>> +
>>> +  %X = fadd float %A, 1.000000e+00
>>> +  %Y = fadd float %A, 1.000000e+00
>>> +  %r = fsub float %X, %Y
>>> +  ret float %r
>>> +}
>>> +
>>> +define float @test2(float %A) {
>>> +; CHECK-LABEL: test2
>>> +; CHECK-NEXT: ret float 0.000000e+00
>>> +
>>> +  %X = fadd fast float 1.000000e+00, %A
>>> +  %Y = fadd fast float 1.000000e+00, %A
>>> +  %r = fsub fast float %X, %Y
>>> +  ret float %r
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-ArrayOutOfBounds.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll
>>> (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll Thu
>>> Aug 14 10:23:01 2014
>>> @@ -0,0 +1,65 @@
>>> +; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
>>> +
>>> +; Not marked as fast, so must not change.
>>> +define float @test1(float %a0, float %a1, float %a2, float %a3, float
>>> %a4) {
>>> +; CHECK-LABEL: test1
>>> +; CHECK-NEXT: %tmp.2 = fadd float %a3, %a4
>>> +; CHECK-NEXT: %tmp.4 = fadd float %tmp.2, %a2
>>> +; CHECK-NEXT: %tmp.6 = fadd float %tmp.4, %a1
>>> +; CHECK-NEXT: %tmp.8 = fadd float %tmp.6, %a0
>>> +; CHECK-NEXT: %tmp.11 = fadd float %a2, %a3
>>> +; CHECK-NEXT: %tmp.13 = fadd float %tmp.11, %a1
>>> +; CHECK-NEXT: %tmp.15 = fadd float %tmp.13, %a0
>>> +; CHECK-NEXT: %tmp.18 = fadd float %a1, %a2
>>> +; CHECK-NEXT: %tmp.20 = fadd float %tmp.18, %a0
>>> +; CHECK-NEXT: %tmp.23 = fadd float %a0, %a1
>>> +; CHECK-NEXT: %tmp.26 = fsub float %tmp.8, %tmp.15
>>> +; CHECK-NEXT: %tmp.28 = fadd float %tmp.20, %tmp.26
>>> +; CHECK-NEXT: %tmp.30 = fsub float %tmp.28, %tmp.23
>>> +; CHECK-NEXT: %tmp.32 = fsub float %tmp.30, %a4
>>> +; CHECK-NEXT: %tmp.34 = fsub float %tmp.32, %a2
>>> +; CHECK-NEXT: %T = fmul float %tmp.34, %tmp.34
>>> +; CHECK-NEXT: ret float %T
>>> +
>>> +  %tmp.2 = fadd float %a4, %a3
>>> +  %tmp.4 = fadd float %tmp.2, %a2
>>> +  %tmp.6 = fadd float %tmp.4, %a1
>>> +  %tmp.8 = fadd float %tmp.6, %a0
>>> +  %tmp.11 = fadd float %a3, %a2
>>> +  %tmp.13 = fadd float %tmp.11, %a1
>>> +  %tmp.15 = fadd float %tmp.13, %a0
>>> +  %tmp.18 = fadd float %a2, %a1
>>> +  %tmp.20 = fadd float %tmp.18, %a0
>>> +  %tmp.23 = fadd float %a1, %a0
>>> +  %tmp.26 = fsub float %tmp.8, %tmp.15
>>> +  %tmp.28 = fadd float %tmp.26, %tmp.20
>>> +  %tmp.30 = fsub float %tmp.28, %tmp.23
>>> +  %tmp.32 = fsub float %tmp.30, %a4
>>> +  %tmp.34 = fsub float %tmp.32, %a2
>>> +  %T = fmul float %tmp.34, %tmp.34
>>> +  ret float %T
>>> +}
>>> +
>>> +; Should be able to eliminate everything.
>>> +define float @test2(float %a0, float %a1, float %a2, float %a3, float
>>> %a4) {
>>> +; CHECK-LABEL: test2
>>> +; CHECK: ret float 0.000000e+00
>>> +
>>> +  %tmp.2 = fadd fast float %a4, %a3
>>> +  %tmp.4 = fadd fast float %tmp.2, %a2
>>> +  %tmp.6 = fadd fast float %tmp.4, %a1
>>> +  %tmp.8 = fadd fast float %tmp.6, %a0
>>> +  %tmp.11 = fadd fast float %a3, %a2
>>> +  %tmp.13 = fadd fast float %tmp.11, %a1
>>> +  %tmp.15 = fadd fast float %tmp.13, %a0
>>> +  %tmp.18 = fadd fast float %a2, %a1
>>> +  %tmp.20 = fadd fast float %tmp.18, %a0
>>> +  %tmp.23 = fadd fast float %a1, %a0
>>> +  %tmp.26 = fsub fast float %tmp.8, %tmp.15
>>> +  %tmp.28 = fadd fast float %tmp.26, %tmp.20
>>> +  %tmp.30 = fsub fast float %tmp.28, %tmp.23
>>> +  %tmp.32 = fsub fast float %tmp.30, %a4
>>> +  %tmp.34 = fsub fast float %tmp.32, %a2
>>> +  %T = fmul fast float %tmp.34, %tmp.34
>>> +  ret float %T
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-MissedTree.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-MissedTree.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-MissedTree.ll (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-MissedTree.ll Thu Aug
>>> 14
>>> 10:23:01 2014
>>> @@ -0,0 +1,11 @@
>>> +; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
>>> +
>>> +define float @test1(float %A, float %B) {
>>> +; CHECK-LABEL: test1
>>> +; CHECK: %Z = fadd fast float %A, %B
>>> +; CHECK: ret float %Z
>>> +       %W = fadd fast float %B, -5.0
>>> +       %Y = fadd fast float %A, 5.0
>>> +       %Z = fadd fast float %W, %Y
>>> +       ret float %Z
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-ReassociateVector.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-ReassociateVector.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-ReassociateVector.ll
>>> (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-ReassociateVector.ll
>>> Thu
>>> Aug 14 10:23:01 2014
>>> @@ -0,0 +1,25 @@
>>> +; RUN: opt < %s -reassociate -S | FileCheck %s
>>> +
>>> +; Don't handle floating point vector operations.
>>> +define <4 x float> @test1() {
>>> +; CHECK-LABEL: test1
>>> +; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer,
>>> zeroinitializer
>>> +; CHECK-NEXT: %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
>>> +
>>> +  %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
>>> +  %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
>>> +  ret <4 x float> %tmp2
>>> +}
>>> +
>>> +; We don't currently commute integer vector operations.
>>> +define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) {
>>> +; CHECK-LABEL: test2
>>> +; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y
>>> +; CHECK-NEXT: %tmp2 = add <2 x i32> %y, %x
>>> +; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2
>>> +
>>> +  %tmp1 = add <2 x i32> %x, %y
>>> +  %tmp2 = add <2 x i32> %y, %x
>>> +  %tmp3 = add <2 x i32> %tmp1, %tmp2
>>> +  ret <2 x i32> %tmp3
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-SubReassociate.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-SubReassociate.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-SubReassociate.ll
>>> (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-SubReassociate.ll Thu
>>> Aug 14 10:23:01 2014
>>> @@ -0,0 +1,70 @@
>>> +; RUN: opt < %s -reassociate -constprop -instcombine -S | FileCheck %s
>>> +
>>> +define float @test1(float %A, float %B) {
>>> +; CHECK-LABEL: test1
>>> +; CHECK-NEXT: %W = fadd float %B, 5.000000e+00
>>> +; CHECK-NEXT: %X = fadd float %A, -7.000000e+00
>>> +; CHECK-NEXT: %Y = fsub float %X, %W
>>> +; CHECK-NEXT: %Z = fadd float %Y, 1.200000e+01
>>> +; CHECK-NEXT: ret float %Z
>>> +
>>> +  %W = fadd float 5.0, %B
>>> +  %X = fadd float -7.0, %A
>>> +  %Y = fsub float %X, %W
>>> +  %Z = fadd float %Y, 12.0
>>> +  ret float %Z
>>> +}
>>> +
>>> +; With sub reassociation, constant folding can eliminate all of the
>>> constants.
>>> +define float @test2(float %A, float %B) {
>>> +; CHECK-LABEL: test2
>>> +; CHECK-NEXT: %Z = fsub fast float %A, %B
>>> +; CHECK-NEXT: ret float %Z
>>> +
>>> +  %W = fadd fast float %B, 5.000000e+00
>>> +  %X = fadd fast float %A, -7.000000e+00
>>> +  %Y = fsub fast float %X, %W
>>> +  %Z = fadd fast float %Y, 1.200000e+01
>>> +  ret float %Z
>>> +
>>> +}
>>> +
>>> +define float @test3(float %A, float %B, float %C, float %D) {
>>> +; CHECK-LABEL: test3
>>> +; CHECK-NEXT: %M = fadd float %A, 1.200000e+01
>>> +; CHECK-NEXT: %N = fadd float %M, %B
>>> +; CHECK-NEXT: %O = fadd float %N, %C
>>> +; CHECK-NEXT: %P = fsub float %D, %O
>>> +; CHECK-NEXT: %Q = fadd float %P, 1.200000e+01
>>> +; CHECK-NEXT: ret float %Q
>>> +
>>> +  %M = fadd float %A, 1.200000e+01
>>> +  %N = fadd float %M, %B
>>> +  %O = fadd float %N, %C
>>> +  %P = fsub float %D, %O
>>> +  %Q = fadd float %P, 1.200000e+01
>>> +  ret float %Q
>>> +}
>>> +
>>> +; With sub reassociation, constant folding can eliminate the two 12
>>> constants.
>>> +define float @test4(float %A, float %B, float %C, float %D) {
>>> +; CHECK-LABEL: test4
>>> +; CHECK-NEXT: %B.neg = fsub fast float -0.000000e+00, %B
>>> +; CHECK-NEXT: %O.neg = fsub fast float %B.neg, %A
>>> +; CHECK-NEXT: %P = fsub fast float %O.neg, %C
>>> +; CHECK-NEXT: %Q = fadd fast float %P, %D
>>> +; CHECK-NEXT: ret float %Q
>>> +
>>> +; FIXME: InstCombine should be able to get us to the following:
>>> +; %sum = fadd fast float %B, %A
>>> +; %sum1 = fadd fast float %sum, %C
>>> +; %Q = fsub fast float %D, %sum1
>>> +; ret i32 %Q
>>> +
>>> +  %M = fadd fast float 1.200000e+01, %A
>>> +  %N = fadd fast float %M, %B
>>> +  %O = fadd fast float %N, %C
>>> +  %P = fsub fast float %D, %O
>>> +  %Q = fadd fast float 1.200000e+01, %P
>>> +  ret float %Q
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-basictest.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-basictest.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-basictest.ll (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-basictest.ll Thu Aug 14
>>> 10:23:01 2014
>>> @@ -0,0 +1,285 @@
>>> +; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s
>>> +
>>> +; With reassociation, constant folding can eliminate the 12 and -12
>>> constants.
>>> +define float @test1(float %arg) {
>>> +; CHECK-LABEL: @test1
>>> +; CHECK-NEXT: fsub fast float -0.000000e+00, %arg
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %tmp1 = fsub fast float -1.200000e+01, %arg
>>> +  %tmp2 = fadd fast float %tmp1, 1.200000e+01
>>> +  ret float %tmp2
>>> +}
>>> +
>>> +define float @test2(float %reg109, float %reg1111) {
>>> +; CHECK-LABEL: @test2
>>> +; CHECK-NEXT: fadd float %reg109, -3.000000e+01
>>> +; CHECK-NEXT: fadd float %reg115, %reg1111
>>> +; CHECK-NEXT: fadd float %reg116, 3.000000e+01
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %reg115 = fadd float %reg109, -3.000000e+01
>>> +  %reg116 = fadd float %reg115, %reg1111
>>> +  %reg117 = fadd float %reg116, 3.000000e+01
>>> +  ret float %reg117
>>> +}
>>> +
>>> +define float @test3(float %reg109, float %reg1111) {
>>> +; CHECK-LABEL: @test3
>>> +; CHECK-NEXT: %reg117 = fadd fast float %reg109, %reg1111
>>> +; CHECK-NEXT:  ret float %reg117
>>> +
>>> +  %reg115 = fadd fast float %reg109, -3.000000e+01
>>> +  %reg116 = fadd fast float %reg115, %reg1111
>>> +  %reg117 = fadd fast float %reg116, 3.000000e+01
>>> +  ret float %reg117
>>> +}
>>> +
>>> + at fe = external global float
>>> + at fa = external global float
>>> + at fb = external global float
>>> + at fc = external global float
>>> + at ff = external global float
>>> +
>>> +define void @test4() {
>>> +; CHECK-LABEL: @test4
>>> +; CHECK: fadd fast float
>>> +; CHECK: fadd fast float
>>> +; CHECK-NOT: fadd fast float
>>> +; CHECK: ret void
>>> +
>>> +  %A = load float* @fa
>>> +  %B = load float* @fb
>>> +  %C = load float* @fc
>>> +  %t1 = fadd fast float %A, %B
>>> +  %t2 = fadd fast float %t1, %C
>>> +  %t3 = fadd fast float %C, %A
>>> +  %t4 = fadd fast float %t3, %B
>>> +  ; e = (a+b)+c;
>>> +  store float %t2, float* @fe
>>> +  ; f = (a+c)+b
>>> +  store float %t4, float* @ff
>>> +  ret void
>>> +}
>>> +
>>> +define void @test5() {
>>> +; CHECK-LABEL: @test5
>>> +; CHECK: fadd fast float
>>> +; CHECK: fadd fast float
>>> +; CHECK-NOT: fadd
>>> +; CHECK: ret void
>>> +
>>> +  %A = load float* @fa
>>> +  %B = load float* @fb
>>> +  %C = load float* @fc
>>> +  %t1 = fadd fast float %A, %B
>>> +  %t2 = fadd fast float %t1, %C
>>> +  %t3 = fadd fast float %C, %A
>>> +  %t4 = fadd fast float %t3, %B
>>> +  ; e = c+(a+b)
>>> +  store float %t2, float* @fe
>>> +  ; f = (c+a)+b
>>> +  store float %t4, float* @ff
>>> +  ret void
>>> +}
>>> +
>>> +define void @test6() {
>>> +; CHECK-LABEL: @test6
>>> +; CHECK: fadd fast float
>>> +; CHECK: fadd fast float
>>> +; CHECK-NOT: fadd
>>> +; CHECK: ret void
>>> +
>>> +  %A = load float* @fa
>>> +  %B = load float* @fb
>>> +  %C = load float* @fc
>>> +  %t1 = fadd fast float %B, %A
>>> +  %t2 = fadd fast float %t1, %C
>>> +  %t3 = fadd fast float %C, %A
>>> +  %t4 = fadd fast float %t3, %B
>>> +  ; e = c+(b+a)
>>> +  store float %t2, float* @fe
>>> +  ; f = (c+a)+b
>>> +  store float %t4, float* @ff
>>> +  ret void
>>> +}
>>> +
>>> +define float @test7(float %A, float %B, float %C) {
>>> +; CHECK-LABEL: @test7
>>> +; CHECK-NEXT: fadd fast float %C, %B
>>> +; CHECK-NEXT: fmul fast float %A, %A
>>> +; CHECK-NEXT: fmul fast float %1, %tmp2
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %aa = fmul fast float %A, %A
>>> +  %aab = fmul fast float %aa, %B
>>> +  %ac = fmul fast float %A, %C
>>> +  %aac = fmul fast float %ac, %A
>>> +  %r = fadd fast float %aab, %aac
>>> +  ret float %r
>>> +}
>>> +
>>> +define float @test8(float %X, float %Y, float %Z) {
>>> +; CHECK-LABEL: @test8
>>> +; CHECK-NEXT: fmul fast float %Y, %X
>>> +; CHECK-NEXT: fsub fast float %Z
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %A = fsub fast float 0.0, %X
>>> +  %B = fmul fast float %A, %Y
>>> +  ; (-X)*Y + Z -> Z-X*Y
>>> +  %C = fadd fast float %B, %Z
>>> +  ret float %C
>>> +}
>>> +
>>> +define float @test9(float %X) {
>>> +; CHECK-LABEL: @test9
>>> +; CHECK-NEXT: fmul fast float %X, 9.400000e+01
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %Y = fmul fast float %X, 4.700000e+01
>>> +  %Z = fadd fast float %Y, %Y
>>> +  ret float %Z
>>> +}
>>> +
>>> +define float @test10(float %X) {
>>> +; CHECK-LABEL: @test10
>>> +; CHECK-NEXT: fmul fast float %X, 3.000000e+00
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %Y = fadd fast float %X ,%X
>>> +  %Z = fadd fast float %Y, %X
>>> +  ret float %Z
>>> +}
>>> +
>>> +define float @test11(float %W) {
>>> +; CHECK-LABEL: test11
>>> +; CHECK-NEXT: fmul fast float %W, 3.810000e+02
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %X = fmul fast float %W, 127.0
>>> +  %Y = fadd fast float %X ,%X
>>> +  %Z = fadd fast float %Y, %X
>>> +  ret float %Z
>>> +}
>>> +
>>> +define float @test12(float %X) {
>>> +; CHECK-LABEL: @test12
>>> +; CHECK-NEXT: fmul fast float %X, -3.000000e+00
>>> +; CHECK-NEXT: fadd fast float %factor, 6.000000e+00
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %A = fsub fast float 1.000000e+00, %X
>>> +  %B = fsub fast float 2.000000e+00, %X
>>> +  %C = fsub fast float 3.000000e+00, %X
>>> +  %Y = fadd fast float %A ,%B
>>> +  %Z = fadd fast float %Y, %C
>>> +  ret float %Z
>>> +}
>>> +
>>> +define float @test13(float %X1, float %X2, float %X3) {
>>> +; CHECK-LABEL: @test13
>>> +; CHECK-NEXT: fsub fast float %X3, %X2
>>> +; CHECK-NEXT: fmul fast float {{.*}}, %X1
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %A = fsub fast float 0.000000e+00, %X1
>>> +  %B = fmul fast float %A, %X2   ; -X1*X2
>>> +  %C = fmul fast float %X1, %X3  ; X1*X3
>>> +  %D = fadd fast float %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
>>> +  ret float %D
>>> +}
>>> +
>>> +define float @test14(float %X1, float %X2) {
>>> +; CHECK-LABEL: @test14
>>> +; CHECK-NEXT: fsub fast float %X1, %X2
>>> +; CHECK-NEXT: fmul fast float %tmp, 4.700000e+01
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %B = fmul fast float %X1, 47.   ; X1*47
>>> +  %C = fmul fast float %X2, -47.  ; X2*-47
>>> +  %D = fadd fast float %B, %C    ; X1*47 + X2*-47 -> 47*(X1-X2)
>>> +  ret float %D
>>> +}
>>> +
>>> +define float @test15(float %arg) {
>>> +; CHECK-LABEL: test15
>>> +; CHECK-NEXT: fmul fast float %arg, 1.440000e+02
>>> +; CHECK-NEXT: ret float %tmp2
>>> +
>>> +  %tmp1 = fmul fast float 1.200000e+01, %arg
>>> +  %tmp2 = fmul fast float %tmp1, 1.200000e+01
>>> +  ret float %tmp2
>>> +}
>>> +
>>> +; (b+(a+1234))+-a -> b+1234
>>> +define float @test16(float %b, float %a) {
>>> +; CHECK-LABEL: @test16
>>> +; CHECK-NEXT: fadd fast float %b, 1.234000e+03
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %1 = fadd fast float %a, 1234.0
>>> +  %2 = fadd fast float %b, %1
>>> +  %3 = fsub fast float 0.0, %a
>>> +  %4 = fadd fast float %2, %3
>>> +  ret float %4
>>> +}
>>> +
>>> +; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
>>> +
>>> +define float @test17(float %a, float %b, float %z) {
>>> +; CHECK-LABEL: test17
>>> +; CHECK-NEXT: fmul fast float %a, 1.234500e+04
>>> +; CHECK-NEXT: fmul fast float %e, %b
>>> +; CHECK-NEXT: fmul fast float %f, %z
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %c = fsub fast float 0.000000e+00, %z
>>> +  %d = fmul fast float %a, %b
>>> +  %e = fmul fast float %c, %d
>>> +  %f = fmul fast float %e, 1.234500e+04
>>> +  %g = fsub fast float 0.000000e+00, %f
>>> +  ret float %g
>>> +}
>>> +
>>> +define float @test18(float %a, float %b, float %z) {
>>> +; CHECK-LABEL: test18
>>> +; CHECK-NEXT: fmul fast float %a, 4.000000e+01
>>> +; CHECK-NEXT: fmul fast float %e, %z
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %d = fmul fast float %z, 4.000000e+01
>>> +  %c = fsub fast float 0.000000e+00, %d
>>> +  %e = fmul fast float %a, %c
>>> +  %f = fsub fast float 0.000000e+00, %e
>>> +  ret float %f
>>> +}
>>> +
>>> +; With sub reassociation, constant folding can eliminate the 12 and
>>> -12
>>> constants.
>>> +define float @test19(float %A, float %B) {
>>> +; CHECK-LABEL: @test19
>>> +; CHECK-NEXT: fsub fast float %A, %B
>>> +; CHECK-NEXT: ret float
>>> +  %X = fadd fast float -1.200000e+01, %A
>>> +  %Y = fsub fast float %X, %B
>>> +  %Z = fadd fast float %Y, 1.200000e+01
>>> +  ret float %Z
>>> +}
>>> +
>>> +; With sub reassociation, constant folding can eliminate the uses of
>>> %a.
>>> +define float @test20(float %a, float %b, float %c) nounwind  {
>>> +; CHECK-LABEL: @test20
>>> +; CHECK-NEXT: fsub fast float -0.000000e+00, %b
>>> +; CHECK-NEXT: fsub fast float %b.neg, %c
>>> +; CHECK-NEXT: ret float
>>> +
>>> +; FIXME: Should be able to generate the below, which may expose more
>>> +;        opportunites for FAdd reassociation.
>>> +; %sum = fadd fast float %c, %b
>>> +; %tmp7 = fsub fast float 0, %sum
>>> +
>>> +  %tmp3 = fsub fast float %a, %b
>>> +  %tmp5 = fsub fast float %tmp3, %c
>>> +  %tmp7 = fsub fast float %tmp5, %a
>>> +  ret float %tmp7
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-fp-commute.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-fp-commute.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-fp-commute.ll (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-fp-commute.ll Thu Aug
>>> 14
>>> 10:23:01 2014
>>> @@ -0,0 +1,44 @@
>>> +; RUN: opt -reassociate -S < %s | FileCheck %s
>>> +
>>> +declare void @use(float)
>>> +
>>> +define void @test1(float %x, float %y) {
>>> +; CHECK-LABEL: test1
>>> +; CHECK: fmul fast float %y, %x
>>> +; CHECK: fmul fast float %y, %x
>>> +; CHECK: fsub fast float %1, %2
>>> +; CHECK: call void @use(float %{{.*}})
>>> +; CHECK: call void @use(float %{{.*}})
>>> +
>>> +  %1 = fmul fast float %x, %y
>>> +  %2 = fmul fast float %y, %x
>>> +  %3 = fsub fast float %1, %2
>>> +  call void @use(float %1)
>>> +  call void @use(float %3)
>>> +  ret void
>>> +}
>>> +
>>> +define float @test2(float %x, float %y) {
>>> +; CHECK-LABEL: test2
>>> +; CHECK-NEXT: fmul fast float %y, %x
>>> +; CHECK-NEXT: fmul fast float %y, %x
>>> +; CHECK-NEXT: fsub fast float %1, %2
>>> +; CHECK-NEXT: ret float %3
>>> +
>>> +  %1 = fmul fast float %x, %y
>>> +  %2 = fmul fast float %y, %x
>>> +  %3 = fsub fast float %1, %2
>>> +  ret float %3
>>> +}
>>> +
>>> +define float @test3(float %x, float %y) {
>>> +; CHECK-LABEL: test3
>>> +; CHECK-NEXT: %factor = fmul fast float 2.000000e+00, %y
>>> +; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
>>> +; CHECK-NEXT: ret float %tmp1
>>> +
>>> +  %1 = fmul fast float %x, %y
>>> +  %2 = fmul fast float %y, %x
>>> +  %3 = fadd fast float %1, %2
>>> +  ret float %3
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-mightymul.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-mightymul.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-mightymul.ll (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-mightymul.ll Thu Aug 14
>>> 10:23:01 2014
>>> @@ -0,0 +1,35 @@
>>> +; RUN: opt < %s -reassociate -disable-output
>>> +; PR13021
>>> +
>>> +define float @test2(float %x) {
>>> +  %t0 = fmul fast float %x, %x
>>> +  %t1 = fmul fast float %t0, %t0
>>> +  %t2 = fmul fast float %t1, %t1
>>> +  %t3 = fmul fast float %t2, %t2
>>> +  %t4 = fmul fast float %t3, %t3
>>> +  %t5 = fmul fast float %t4, %t4
>>> +  %t6 = fmul fast float %t5, %t5
>>> +  %t7 = fmul fast float %t6, %t6
>>> +  %t8 = fmul fast float %t7, %t7
>>> +  %t9 = fmul fast float %t8, %t8
>>> +  %t10 = fmul fast float %t9, %t9
>>> +  %t11 = fmul fast float %t10, %t10
>>> +  %t12 = fmul fast float %t11, %t11
>>> +  %t13 = fmul fast float %t12, %t12
>>> +  %t14 = fmul fast float %t13, %t13
>>> +  %t15 = fmul fast float %t14, %t14
>>> +  %t16 = fmul fast float %t15, %t15
>>> +  %t17 = fmul fast float %t16, %t16
>>> +  %t18 = fmul fast float %t17, %t17
>>> +  %t19 = fmul fast float %t18, %t18
>>> +  %t20 = fmul fast float %t19, %t19
>>> +  %t21 = fmul fast float %t20, %t20
>>> +  %t22 = fmul fast float %t21, %t21
>>> +  %t23 = fmul fast float %t22, %t22
>>> +  %t24 = fmul fast float %t23, %t23
>>> +  %t25 = fmul fast float %t24, %t24
>>> +  %t26 = fmul fast float %t25, %t25
>>> +  %t27 = fmul fast float %t26, %t26
>>> +  %t28 = fmul fast float %t27, %t27
>>> +  ret float %t28
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/Reassociate/fast-multistep.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>> Transforms/Reassociate/fast-multistep.ll?rev=215647&view=auto
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/Transforms/Reassociate/fast-multistep.ll (added)
>>> +++ llvm/trunk/test/Transforms/Reassociate/fast-multistep.ll Thu Aug 14
>>> 10:23:01 2014
>>> @@ -0,0 +1,32 @@
>>> +; RUN: opt < %s -reassociate -S | FileCheck %s
>>> +
>>> +define float @fmultistep1(float %a, float %b, float %c) {
>>> +; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
>>> +; CHECK-LABEL: @fmultistep1
>>> +; CHECK-NEXT: fadd fast float %c, %b
>>> +; CHECK-NEXT: fmul fast float %a, %tmp2
>>> +; CHECK-NEXT: fmul fast float %tmp3, %a
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %t0 = fmul fast float %a, %b
>>> +  %t1 = fmul fast float %a, %t0 ; a*(a*b)
>>> +  %t2 = fmul fast float %a, %c
>>> +  %t3 = fmul fast float %a, %t2 ; a*(a*c)
>>> +  %t4 = fadd fast float %t1, %t3
>>> +  ret float %t4
>>> +}
>>> +
>>> +define float @fmultistep2(float %a, float %b, float %c, float %d) {
>>> +; Check that a*b+a*c+d is turned into a*(b+c)+d.
>>> +; CHECK-LABEL: @fmultistep2
>>> +; CHECK-NEXT: fadd fast float %c, %b
>>> +; CHECK-NEXT: fmul fast float %tmp, %a
>>> +; CHECK-NEXT: fadd fast float %tmp1, %d
>>> +; CHECK-NEXT: ret float
>>> +
>>> +  %t0 = fmul fast float %a, %b
>>> +  %t1 = fmul fast float %a, %c
>>> +  %t2 = fadd fast float %t1, %d ; a*c+d
>>> +  %t3 = fadd fast float %t0, %t2 ; a*b+(a*c+d)
>>> +  ret float %t3
>>> +}
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>>
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>>
>