[llvm-commits] [llvm] r158358 - in /llvm/trunk: include/llvm/Constants.h include/llvm/Instruction.h lib/Transforms/Scalar/Reassociate.cpp lib/VMCore/Constants.cpp lib/VMCore/Instruction.cpp test/Transforms/Reassociate/repeats.ll

Duncan Sands baldrick at free.fr
Tue Jun 12 13:07:10 PDT 2012


Hi Matt, thanks for the great test case.  It seems to be a bug in SmallMap: it
assumes that the key has POD type.  Alternatively, maybe the key is required to
be a POD type but I don't see that documented anywhere.  Hopefully Stepan can
clarify.  In the meantime I will change the code to use a std::map.

Ciao, Duncan.

On 12/06/12 20:53, Matt Beaumont-Gay wrote:
> Here's the reduced input:
>
> typedef __uint128_t widelimb;
> typedef unsigned long long felem[4];
> typedef __uint128_t widefelem[7];
> static void felem_square(widefelem out, const felem in)  {
>    out[6] = ((widelimb) in[3]) * in[3];
> }
> void ec_GFp_nistp224_point_get_affine_coordinates()  {
>    felem z2;
>    widefelem tmp;
>    felem_square(tmp, z2);
> }
>
> (command line: clang -cc1 -emit-obj -O1 -o /dev/null /tmp/crasher.i)
>
> On Tue, Jun 12, 2012 at 11:17 AM, Matt Beaumont-Gay
> <matthewbg at google.com>  wrote:
>> This might also help:
>>
>> ==31759== ERROR: AddressSanitizer attempting double-free on 0x7f4fc6415580:
>>   #0 0x40b3c02 operator delete[]()
>>   #1 0x4006c54 llvm::APInt::AssignSlowCase()
>>   #2 0x39048eb std::pair<>::operator=()
>>   #3 0x39045c9 llvm::FlatArrayMap<>::insertInternal()
>>   #4 0x3902ee3 llvm::FlatArrayMap<>::insert()
>>   #5 0x3902a0c llvm::MultiImplMap<>::insert()
>>   #6 0x39013d1 llvm::MultiImplMap<>::operator[]()
>>   #7 0x38f12bf LinearizeExprTree()
>>   #8 0x38eff0c (anonymous namespace)::Reassociate::ReassociateExpression()
>>   #9 0x38ef101 (anonymous namespace)::Reassociate::OptimizeInst()
>>   #10 0x38ee154 (anonymous namespace)::Reassociate::runOnFunction()
>>   #11 0x3f02ac5 llvm::FPPassManager::runOnFunction()
>>   #12 0x3b9f498 (anonymous namespace)::CGPassManager::RunPassOnSCC()
>>   #13 0x3b9ed0e (anonymous namespace)::CGPassManager::RunAllPassesOnSCC()
>>   #14 0x3b9e4e2 (anonymous namespace)::CGPassManager::runOnModule()
>>   #15 0x3f0328c llvm::MPPassManager::runOnModule()
>>   #16 0x3f03c96 llvm::PassManagerImpl::run()
>>   #17 0x3f03e99 llvm::PassManager::run()
>>   #18 0x14b92a1 (anonymous namespace)::EmitAssemblyHelper::EmitAssembly()
>>   #19 0x14b8df7 clang::EmitBackendOutput()
>>   #20 0x14b25b3 clang::BackendConsumer::HandleTranslationUnit()
>>   #21 0x2193ba2 clang::ParseAST()
>>   #22 0x14b06b4 clang::CodeGenAction::ExecuteAction()
>>   #23 0x2005fb3 clang::FrontendAction::Execute()
>>   #24 0x1e960fa clang::CompilerInstance::ExecuteAction()
>>   #25 0x14abf1a clang::ExecuteCompilerInvocation()
>>   #26 0x148f499 cc1_main()
>>   #27 0x14a2b9b main
>>   #28 0x7f4fc9656d5d __libc_start_main
>> 0x7f4fc6415580 is located 0 bytes inside of 16-byte region
>> [0x7f4fc6415580,0x7f4fc6415590)
>> freed by thread T0 here:
>>   #0 0x40b3c02 operator delete[]()
>>   #1 0x39022bf llvm::FlatArrayMap<>::erase()
>>   #2 0x3902200 llvm::MultiImplMap<>::erase()
>>   #3 0x38f0ffc LinearizeExprTree()
>>   #4 0x38eff0c (anonymous namespace)::Reassociate::ReassociateExpression()
>>   #5 0x38ef101 (anonymous namespace)::Reassociate::OptimizeInst()
>>   #6 0x38ee154 (anonymous namespace)::Reassociate::runOnFunction()
>>   #7 0x3f02ac5 llvm::FPPassManager::runOnFunction()
>>   #8 0x3b9f498 (anonymous namespace)::CGPassManager::RunPassOnSCC()
>>   #9 0x3b9ed0e (anonymous namespace)::CGPassManager::RunAllPassesOnSCC()
>>   #10 0x3b9e4e2 (anonymous namespace)::CGPassManager::runOnModule()
>>   #11 0x3f0328c llvm::MPPassManager::runOnModule()
>>   #12 0x3f03c96 llvm::PassManagerImpl::run()
>>   #13 0x3f03e99 llvm::PassManager::run()
>>   #14 0x14b92a1 (anonymous namespace)::EmitAssemblyHelper::EmitAssembly()
>>   #15 0x14b8df7 clang::EmitBackendOutput()
>>   #16 0x14b25b3 clang::BackendConsumer::HandleTranslationUnit()
>>   #17 0x2193ba2 clang::ParseAST()
>>   #18 0x14b06b4 clang::CodeGenAction::ExecuteAction()
>>   #19 0x2005fb3 clang::FrontendAction::Execute()
>>   #20 0x1e960fa clang::CompilerInstance::ExecuteAction()
>>   #21 0x14abf1a clang::ExecuteCompilerInvocation()
>>   #22 0x148f499 cc1_main()
>>   #23 0x14a2b9b main
>>   #24 0x7f4fc9656d5d __libc_start_main
>> previously allocated by thread T0 here:
>>   #0 0x40b3a82 operator new[]()
>>   #1 0x4005c7f getMemory()
>>   #2 0x4006a2f llvm::APInt::AssignSlowCase()
>>   #3 0x38f12d2 LinearizeExprTree()
>>   #4 0x38eff0c (anonymous namespace)::Reassociate::ReassociateExpression()
>>   #5 0x38ef101 (anonymous namespace)::Reassociate::OptimizeInst()
>>   #6 0x38ee154 (anonymous namespace)::Reassociate::runOnFunction()
>>   #7 0x3f02ac5 llvm::FPPassManager::runOnFunction()
>>   #8 0x3b9f498 (anonymous namespace)::CGPassManager::RunPassOnSCC()
>>   #9 0x3b9ed0e (anonymous namespace)::CGPassManager::RunAllPassesOnSCC()
>>   #10 0x3b9e4e2 (anonymous namespace)::CGPassManager::runOnModule()
>>   #11 0x3f0328c llvm::MPPassManager::runOnModule()
>>   #12 0x3f03c96 llvm::PassManagerImpl::run()
>>   #13 0x3f03e99 llvm::PassManager::run()
>>   #14 0x14b92a1 (anonymous namespace)::EmitAssemblyHelper::EmitAssembly()
>>   #15 0x14b8df7 clang::EmitBackendOutput()
>>   #16 0x14b25b3 clang::BackendConsumer::HandleTranslationUnit()
>>   #17 0x2193ba2 clang::ParseAST()
>>   #18 0x14b06b4 clang::CodeGenAction::ExecuteAction()
>>   #19 0x2005fb3 clang::FrontendAction::Execute()
>>   #20 0x1e960fa clang::CompilerInstance::ExecuteAction()
>>   #21 0x14abf1a clang::ExecuteCompilerInvocation()
>>   #22 0x148f499 cc1_main()
>>   #23 0x14a2b9b main
>>
>> On Tue, Jun 12, 2012 at 11:04 AM, Matt Beaumont-Gay
>> <matthewbg at google.com>  wrote:
>>> This seems to have caused some heap corruption when building OpenSSL at -O1:
>>>
>>> #14 0x00007f45009c7806 in malloc_printerr (action=3,
>>>     str=0x7f4500a9b2f0 "double free or corruption (fasttop)",
>>>     ptr=<optimized out>) at malloc.c:6266
>>> #15 0x00007f45009ce0d3 in *__GI___libc_free (mem=<optimized out>)
>>>     at malloc.c:3738
>>> #16 0x0000000002821bb9 in llvm::APInt::AssignSlowCase (this=0x7fffa46d55b8,
>>>     RHS=...) at llvm/lib/Support/APInt.cpp:143
>>> #17 0x00000000008f1f59 in llvm::APInt::operator= (this=0x7fffa46d55b8, RHS=...)
>>>     at llvm/include/llvm/ADT/APInt.h:595
>>> #18 0x0000000002401d5e in std::pair<llvm::Value*, llvm::APInt>::operator= (
>>>     this=0x7fffa46d55b0)
>>>     at /usr/lib/gcc/x86_64-linux-gnu/4.4/../../../../include/c++/4.4/bits/stl_pair.h:67
>>> #19 0x0000000002402ae6 in llvm::FlatArrayMap<llvm::Value*,
>>> llvm::APInt, 8u>::insertInternal (this=0x7fffa46d5598, Ptr=0x4767698,
>>>     Val=<error reading variable: Unhandled dwarf expression opcode 0x0>,
>>>     Item=@0x7fffa46d4fd8: 0x4767698)
>>>     at llvm/include/llvm/ADT/FlatArrayMap.h:117
>>> #20 0x0000000002401fed in llvm::FlatArrayMap<llvm::Value*,
>>> llvm::APInt, 8u>::insert (this=0x7fffa46d5598, KV=...)
>>>     at llvm/include/llvm/ADT/FlatArrayMap.h:188
>>> #21 0x0000000002401e57 in
>>> llvm::MultiImplMap<llvm::FlatArrayMap<llvm::Value*, llvm::APInt, 8u>,
>>> llvm::DenseMap<llvm::Value*, llvm::APInt,
>>> llvm::DenseMapInfo<llvm::Value*>  >, 8u, false,
>>> llvm::MultiImplMapIteratorsFactory<llvm::FlatArrayMap<llvm::Value*,
>>> llvm::APInt, 8u>, llvm::DenseMap<llvm::Value*, llvm::APInt,
>>> llvm::DenseMapInfo<llvm::Value*>  >  >  >::insert (this=0x7fffa46d5598,
>>> KV=...)
>>>     at llvm/include/llvm/ADT/MultiImplMap.h:218
>>> #22 0x0000000002400e81 in
>>> llvm::MultiImplMap<llvm::FlatArrayMap<llvm::Value*, llvm::APInt, 8u>,
>>> llvm::DenseMap<llvm::Value*, llvm::APInt,
>>> llvm::DenseMapInfo<llvm::Value*>  >, 8u, false,
>>> llvm::MultiImplMapIteratorsFactory<llvm::FlatArrayMap<llvm::Value*,
>>> llvm::APInt, 8u>, llvm::DenseMap<llvm::Value*, llvm::APInt,
>>> llvm::DenseMapInfo<llvm::Value*>  >  >  >::operator[]
>>> (this=0x7fffa46d5598,
>>>     Key=@0x7fffa46d54a0: 0x4767698)
>>>     at llvm/include/llvm/ADT/MultiImplMap.h:281
>>> #23 0x00000000023f6626 in LinearizeExprTree (I=0x47679b0, Ops=...)
>>>     at llvm/lib/Transforms/Scalar/Reassociate.cpp:589
>>>
>>> I'll throw delta at it and give you a real bug report soon.
>>>
>>> On Tue, Jun 12, 2012 at 7:33 AM, Duncan Sands<baldrick at free.fr>  wrote:
>>>> Author: baldrick
>>>> Date: Tue Jun 12 09:33:56 2012
>>>> New Revision: 158358
>>>>
>>>> URL: http://llvm.org/viewvc/llvm-project?rev=158358&view=rev
>>>> Log:
>>>> Now that Reassociate's LinearizeExprTree can look through arbitrary expression
>>>> topologies, it is quite possible for a leaf node to have huge multiplicity, for
>>>> example: x0 = x*x, x1 = x0*x0, x2 = x1*x1, ... rapidly gives a value which is x
>>>> raised to a vast power (the multiplicity, or weight, of x).  This patch fixes
>>>> the computation of weights by correctly computing them no matter how big they
>>>> are, rather than just overflowing and getting a wrong value.  It turns out that
>>>> the weight for a value never needs more bits to represent than the value itself,
>>>> so it is enough to represent weights as APInts of the same bitwidth and do the
>>>> right overflow-avoiding dance steps when computing weights.  As a side-effect it
>>>> reduces the number of multiplies needed in some cases of large powers.  While
>>>> there, in view of external uses (eg by the vectorizer) I made LinearizeExprTree
>>>> static, pushing the rank computation out into users.  This is progress towards
>>>> fixing PR13021.
>>>>
>>>> Added:
>>>>     llvm/trunk/test/Transforms/Reassociate/repeats.ll
>>>> Modified:
>>>>     llvm/trunk/include/llvm/Constants.h
>>>>     llvm/trunk/include/llvm/Instruction.h
>>>>     llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp
>>>>     llvm/trunk/lib/VMCore/Constants.cpp
>>>>     llvm/trunk/lib/VMCore/Instruction.cpp
>>>>
>>>> Modified: llvm/trunk/include/llvm/Constants.h
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=158358&r1=158357&r2=158358&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/include/llvm/Constants.h (original)
>>>> +++ llvm/trunk/include/llvm/Constants.h Tue Jun 12 09:33:56 2012
>>>> @@ -917,6 +917,11 @@
>>>>      return getLShr(C1, C2, true);
>>>>    }
>>>>
>>>> +  /// getBinOpIdentity - Return the identity for the given binary operation,
>>>> +  /// i.e. a constant C such that X op C = X and C op X = X for every X.  It
>>>> +  /// is an error to call this for an operation that doesn't have an identity.
>>>> +  static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty);
>>>> +
>>>>    /// Transparently provide more efficient getOperand methods.
>>>>    DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
>>>>
>>>>
>>>> Modified: llvm/trunk/include/llvm/Instruction.h
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Instruction.h?rev=158358&r1=158357&r2=158358&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/include/llvm/Instruction.h (original)
>>>> +++ llvm/trunk/include/llvm/Instruction.h Tue Jun 12 09:33:56 2012
>>>> @@ -215,6 +215,27 @@
>>>>    bool isCommutative() const { return isCommutative(getOpcode()); }
>>>>    static bool isCommutative(unsigned op);
>>>>
>>>> +  /// isIdempotent - Return true if the instruction is idempotent:
>>>> +  ///
>>>> +  ///   Idempotent operators satisfy:  x op x === x
>>>> +  ///
>>>> +  /// In LLVM, the And and Or operators are idempotent.
>>>> +  ///
>>>> +  bool isIdempotent() const { return isIdempotent(getOpcode()); }
>>>> +  static bool isIdempotent(unsigned op);
>>>> +
>>>> +  /// isNilpotent - Return true if the instruction is nilpotent:
>>>> +  ///
>>>> +  ///   Nilpotent operators satisfy:  x op x === Id,
>>>> +  ///
>>>> +  ///   where Id is the identity for the operator, i.e. a constant such that
>>>> +  ///     x op Id === x and Id op x === x for all x.
>>>> +  ///
>>>> +  /// In LLVM, the Xor operator is nilpotent.
>>>> +  ///
>>>> +  bool isNilpotent() const { return isNilpotent(getOpcode()); }
>>>> +  static bool isNilpotent(unsigned op);
>>>> +
>>>>    /// mayWriteToMemory - Return true if this instruction may modify memory.
>>>>    ///
>>>>    bool mayWriteToMemory() const;
>>>>
>>>> Modified: llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp?rev=158358&r1=158357&r2=158358&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp (original)
>>>> +++ llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp Tue Jun 12 09:33:56 2012
>>>> @@ -143,7 +143,6 @@
>>>>      Value *buildMinimalMultiplyDAG(IRBuilder<>  &Builder,
>>>>                                     SmallVectorImpl<Factor>  &Factors);
>>>>      Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry>  &Ops);
>>>> -    void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry>  &Ops);
>>>>      Value *RemoveFactorFromExpression(Value *V, Value *Factor);
>>>>      void EraseInst(Instruction *I);
>>>>      void OptimizeInst(Instruction *I);
>>>> @@ -251,10 +250,148 @@
>>>>    return Res;
>>>>   }
>>>>
>>>> +/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda
>>>> +/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for
>>>> +/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
>>>> +/// Note that 0<= k<  Bitwidth, and if Bitwidth>  3 then x^(2^k) = 0 for every
>>>> +/// even x in Bitwidth-bit arithmetic.
>>>> +static unsigned CarmichaelShift(unsigned Bitwidth) {
>>>> +  if (Bitwidth<  3)
>>>> +    return Bitwidth - 1;
>>>> +  return Bitwidth - 2;
>>>> +}
>>>> +
>>>> +/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS',
>>>> +/// reducing the combined weight using any special properties of the operation.
>>>> +/// The existing weight LHS represents the computation X op X op ... op X where
>>>> +/// X occurs LHS times.  The combined weight represents  X op X op ... op X with
>>>> +/// X occurring LHS + RHS times.  If op is "Xor" for example then the combined
>>>> +/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
>>>> +/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
>>>> +static void IncorporateWeight(APInt&LHS, const APInt&RHS, unsigned Opcode) {
>>>> +  // If we were working with infinite precision arithmetic then the combined
>>>> +  // weight would be LHS + RHS.  But we are using finite precision arithmetic,
>>>> +  // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
>>>> +  // for nilpotent operations and addition, but not for idempotent operations
>>>> +  // and multiplication), so it is important to correctly reduce the combined
>>>> +  // weight back into range if wrapping would be wrong.
>>>> +
>>>> +  // If RHS is zero then the weight didn't change.
>>>> +  if (RHS.isMinValue())
>>>> +    return;
>>>> +  // If LHS is zero then the combined weight is RHS.
>>>> +  if (LHS.isMinValue()) {
>>>> +    LHS = RHS;
>>>> +    return;
>>>> +  }
>>>> +  // From this point on we know that neither LHS nor RHS is zero.
>>>> +
>>>> +  if (Instruction::isIdempotent(Opcode)) {
>>>> +    // Idempotent means X op X === X, so any non-zero weight is equivalent to a
>>>> +    // weight of 1.  Keeping weights at zero or one also means that wrapping is
>>>> +    // not a problem.
>>>> +    assert(LHS == 1&&  RHS == 1&&  "Weights not reduced!");
>>>> +    return; // Return a weight of 1.
>>>> +  }
>>>> +  if (Instruction::isNilpotent(Opcode)) {
>>>> +    // Nilpotent means X op X === 0, so reduce weights modulo 2.
>>>> +    assert(LHS == 1&&  RHS == 1&&  "Weights not reduced!");
>>>> +    LHS = 0; // 1 + 1 === 0 modulo 2.
>>>> +    return;
>>>> +  }
>>>> +  if (Opcode == Instruction::Add) {
>>>> +    // TODO: Reduce the weight by exploiting nsw/nuw?
>>>> +    LHS += RHS;
>>>> +    return;
>>>> +  }
>>>> +
>>>> +  assert(Opcode == Instruction::Mul&&  "Unknown associative operation!");
>>>> +  unsigned Bitwidth = LHS.getBitWidth();
>>>> +  // If CM is the Carmichael number then a weight W satisfying W>= CM+Bitwidth
>>>> +  // can be replaced with W-CM.  That's because x^W=x^(W-CM) for every Bitwidth
>>>> +  // bit number x, since either x is odd in which case x^CM = 1, or x is even in
>>>> +  // which case both x^W and x^(W - CM) are zero.  By subtracting off multiples
>>>> +  // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
>>>> +  // which by a happy accident means that they can always be represented using
>>>> +  // Bitwidth bits.
>>>> +  // TODO: Reduce the weight by exploiting nsw/nuw?  (Could do much better than
>>>> +  // the Carmichael number).
>>>> +  if (Bitwidth>  3) {
>>>> +    /// CM - The value of Carmichael's lambda function.
>>>> +    APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
>>>> +    // Any weight W>= Threshold can be replaced with W - CM.
>>>> +    APInt Threshold = CM + Bitwidth;
>>>> +    assert(LHS.ult(Threshold)&&  RHS.ult(Threshold)&&  "Weights not reduced!");
>>>> +    // For Bitwidth 4 or more the following sum does not overflow.
>>>> +    LHS += RHS;
>>>> +    while (LHS.uge(Threshold))
>>>> +      LHS -= CM;
>>>> +  } else {
>>>> +    // To avoid problems with overflow do everything the same as above but using
>>>> +    // a larger type.
>>>> +    unsigned CM = 1U<<  CarmichaelShift(Bitwidth);
>>>> +    unsigned Threshold = CM + Bitwidth;
>>>> +    assert(LHS.getZExtValue()<  Threshold&&  RHS.getZExtValue()<  Threshold&&
>>>> +           "Weights not reduced!");
>>>> +    unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
>>>> +    while (Total>= Threshold)
>>>> +      Total -= CM;
>>>> +    LHS = Total;
>>>> +  }
>>>> +}
>>>> +
>>>> +/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C
>>>> +/// is repeated Weight times.
>>>> +static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C,
>>>> +                                          APInt Weight) {
>>>> +  // For addition the result can be efficiently computed as the product of the
>>>> +  // constant and the weight.
>>>> +  if (Opcode == Instruction::Add)
>>>> +    return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight));
>>>> +
>>>> +  // The weight might be huge, so compute by repeated squaring to ensure that
>>>> +  // compile time is proportional to the logarithm of the weight.
>>>> +  Constant *Result = 0;
>>>> +  Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc.
>>>> +  // Visit the bits in Weight.
>>>> +  while (Weight != 0) {
>>>> +    // If the current bit in Weight is non-zero do Result = Result op Power.
>>>> +    if (Weight[0])
>>>> +      Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power;
>>>> +    // Move on to the next bit if any more are non-zero.
>>>> +    Weight = Weight.lshr(1);
>>>> +    if (Weight.isMinValue())
>>>> +      break;
>>>> +    // Square the power.
>>>> +    Power = ConstantExpr::get(Opcode, Power, Power);
>>>> +  }
>>>> +
>>>> +  assert(Result&&  "Only positive weights supported!");
>>>> +  return Result;
>>>> +}
>>>> +
>>>> +typedef std::pair<Value*, APInt>  RepeatedValue;
>>>> +
>>>>   /// LinearizeExprTree - Given an associative binary expression, return the leaf
>>>> -/// nodes in Ops.  The original expression is the same as Ops[0] op ... Ops[N].
>>>> -/// Note that a node may occur multiple times in Ops, but if so all occurrences
>>>> -/// are consecutive in the vector.
>>>> +/// nodes in Ops along with their weights (how many times the leaf occurs).  The
>>>> +/// original expression is the same as
>>>> +///   (Ops[0].first op Ops[0].first op ... Ops[0].first)<- Ops[0].second times
>>>> +/// op
>>>> +///   (Ops[1].first op Ops[1].first op ... Ops[1].first)<- Ops[1].second times
>>>> +/// op
>>>> +///   ...
>>>> +/// op
>>>> +///   (Ops[N].first op Ops[N].first op ... Ops[N].first)<- Ops[N].second times
>>>> +///
>>>> +/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and
>>>> +/// they are all non-constant except possibly for the last one, which if it is
>>>> +/// constant will have weight one (Ops[N].second === 1).
>>>> +///
>>>> +/// This routine may modify the function, in which case it returns 'true'.  The
>>>> +/// changes it makes may well be destructive, changing the value computed by 'I'
>>>> +/// to something completely different.  Thus if the routine returns 'true' then
>>>> +/// you MUST either replace I with a new expression computed from the Ops array,
>>>> +/// or use RewriteExprTree to put the values back in.
>>>>   ///
>>>>   /// A leaf node is either not a binary operation of the same kind as the root
>>>>   /// node 'I' (i.e. is not a binary operator at all, or is, but with a different
>>>> @@ -276,7 +413,7 @@
>>>>   ///                   +   *      |      F,  G
>>>>   ///
>>>>   /// The leaf nodes are C, E, F and G.  The Ops array will contain (maybe not in
>>>> -/// that order) C, E, F, F, G, G.
>>>> +/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
>>>>   ///
>>>>   /// The expression is maximal: if some instruction is a binary operator of the
>>>>   /// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
>>>> @@ -287,7 +424,8 @@
>>>>   /// order to ensure that every non-root node in the expression has *exactly one*
>>>>   /// use by a non-leaf node of the expression.  This destruction means that the
>>>>   /// caller MUST either replace 'I' with a new expression or use something like
>>>> -/// RewriteExprTree to put the values back in.
>>>> +/// RewriteExprTree to put the values back in if the routine indicates that it
>>>> +/// made a change by returning 'true'.
>>>>   ///
>>>>   /// In the above example either the right operand of A or the left operand of B
>>>>   /// will be replaced by undef.  If it is B's operand then this gives:
>>>> @@ -310,9 +448,14 @@
>>>>   /// of the expression) if it can turn them into binary operators of the right
>>>>   /// type and thus make the expression bigger.
>>>>
>>>> -void Reassociate::LinearizeExprTree(BinaryOperator *I,
>>>> -                                    SmallVectorImpl<ValueEntry>  &Ops) {
>>>> +static bool LinearizeExprTree(BinaryOperator *I,
>>>> +                              SmallVectorImpl<RepeatedValue>  &Ops) {
>>>>    DEBUG(dbgs()<<  "LINEARIZE: "<<  *I<<  '\n');
>>>> +  unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
>>>> +  unsigned Opcode = I->getOpcode();
>>>> +  assert(Instruction::isAssociative(Opcode)&&
>>>> +         Instruction::isCommutative(Opcode)&&
>>>> +         "Expected an associative and commutative operation!");
>>>>
>>>>    // Visit all operands of the expression, keeping track of their weight (the
>>>>    // number of paths from the expression root to the operand, or if you like
>>>> @@ -324,9 +467,9 @@
>>>>    // with their weights, representing a certain number of paths to the operator.
>>>>    // If an operator occurs in the worklist multiple times then we found multiple
>>>>    // ways to get to it.
>>>> -  SmallVector<std::pair<BinaryOperator*, unsigned>, 8>  Worklist; // (Op, Weight)
>>>> -  Worklist.push_back(std::make_pair(I, 1));
>>>> -  unsigned Opcode = I->getOpcode();
>>>> +  SmallVector<std::pair<BinaryOperator*, APInt>, 8>  Worklist; // (Op, Weight)
>>>> +  Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
>>>> +  bool MadeChange = false;
>>>>
>>>>    // Leaves of the expression are values that either aren't the right kind of
>>>>    // operation (eg: a constant, or a multiply in an add tree), or are, but have
>>>> @@ -343,7 +486,7 @@
>>>>
>>>>    // Leaves - Keeps track of the set of putative leaves as well as the number of
>>>>    // paths to each leaf seen so far.
>>>> -  typedef SmallMap<Value*, unsigned, 8>  LeafMap;
>>>> +  typedef SmallMap<Value*, APInt, 8>  LeafMap;
>>>>    LeafMap Leaves; // Leaf ->  Total weight so far.
>>>>    SmallVector<Value*, 8>  LeafOrder; // Ensure deterministic leaf output order.
>>>>
>>>> @@ -351,13 +494,12 @@
>>>>    SmallPtrSet<Value*, 8>  Visited; // For sanity checking the iteration scheme.
>>>>   #endif
>>>>    while (!Worklist.empty()) {
>>>> -    std::pair<BinaryOperator*, unsigned>  P = Worklist.pop_back_val();
>>>> +    std::pair<BinaryOperator*, APInt>  P = Worklist.pop_back_val();
>>>>      I = P.first; // We examine the operands of this binary operator.
>>>> -    assert(P.second>= 1&&  "No paths to here, so how did we get here?!");
>>>>
>>>>      for (unsigned OpIdx = 0; OpIdx<  2; ++OpIdx) { // Visit operands.
>>>>        Value *Op = I->getOperand(OpIdx);
>>>> -      unsigned Weight = P.second; // Number of paths to this operand.
>>>> +      APInt Weight = P.second; // Number of paths to this operand.
>>>>        DEBUG(dbgs()<<  "OPERAND: "<<  *Op<<  " ("<<  Weight<<  ")\n");
>>>>        assert(!Op->use_empty()&&  "No uses, so how did we get to it?!");
>>>>
>>>> @@ -389,7 +531,7 @@
>>>>          assert(Visited.count(Op)&&  "In leaf map but not visited!");
>>>>
>>>>          // Update the number of paths to the leaf.
>>>> -        It->second += Weight;
>>>> +        IncorporateWeight(It->second, Weight, Opcode);
>>>>
>>>>          // The leaf already has one use from inside the expression.  As we want
>>>>          // exactly one such use, drop this new use of the leaf.
>>>> @@ -450,21 +592,44 @@
>>>>
>>>>    // The leaves, repeated according to their weights, represent the linearized
>>>>    // form of the expression.
>>>> +  Constant *Cst = 0; // Accumulate constants here.
>>>>    for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
>>>>      Value *V = LeafOrder[i];
>>>>      LeafMap::iterator It = Leaves.find(V);
>>>>      if (It == Leaves.end())
>>>> -      // Leaf already output, or node initially thought to be a leaf wasn't.
>>>> +      // Node initially thought to be a leaf wasn't.
>>>>        continue;
>>>>      assert(!isReassociableOp(V, Opcode)&&  "Shouldn't be a leaf!");
>>>> -    unsigned Weight = It->second;
>>>> -    assert(Weight>  0&&  "No paths to this value!");
>>>> -    // FIXME: Rather than repeating values Weight times, use a vector of
>>>> -    // (ValueEntry, multiplicity) pairs.
>>>> -    Ops.append(Weight, ValueEntry(getRank(V), V));
>>>> +    APInt Weight = It->second;
>>>> +    if (Weight.isMinValue())
>>>> +      // Leaf already output or weight reduction eliminated it.
>>>> +      continue;
>>>>      // Ensure the leaf is only output once.
>>>> -    Leaves.erase(It);
>>>> +    It->second = 0;
>>>> +    // Glob all constants together into Cst.
>>>> +    if (Constant *C = dyn_cast<Constant>(V)) {
>>>> +      C = EvaluateRepeatedConstant(Opcode, C, Weight);
>>>> +      Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C;
>>>> +      continue;
>>>> +    }
>>>> +    // Add non-constant
>>>> +    Ops.push_back(std::make_pair(V, Weight));
>>>> +  }
>>>> +
>>>> +  // Add any constants back into Ops, all globbed together and reduced to having
>>>> +  // weight 1 for the convenience of users.
>>>> +  if (Cst&&  Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType()))
>>>> +    Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1)));
>>>> +
>>>> +  // For nilpotent operations or addition there may be no operands, for example
>>>> +  // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
>>>> +  // in both cases the weight reduces to 0 causing the value to be skipped.
>>>> +  if (Ops.empty()) {
>>>> +    Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
>>>> +    Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
>>>>    }
>>>> +
>>>> +  return MadeChange;
>>>>   }
>>>>
>>>>   // RewriteExprTree - Now that the operands for this expression tree are
>>>> @@ -775,8 +940,15 @@
>>>>    BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
>>>>    if (!BO) return 0;
>>>>
>>>> +  SmallVector<RepeatedValue, 8>  Tree;
>>>> +  MadeChange |= LinearizeExprTree(BO, Tree);
>>>>    SmallVector<ValueEntry, 8>  Factors;
>>>> -  LinearizeExprTree(BO, Factors);
>>>> +  Factors.reserve(Tree.size());
>>>> +  for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
>>>> +    RepeatedValue E = Tree[i];
>>>> +    Factors.append(E.second.getZExtValue(),
>>>> +                   ValueEntry(getRank(E.first), E.first));
>>>> +  }
>>>>
>>>>    bool FoundFactor = false;
>>>>    bool NeedsNegate = false;
>>>> @@ -1439,8 +1611,15 @@
>>>>
>>>>    // First, walk the expression tree, linearizing the tree, collecting the
>>>>    // operand information.
>>>> +  SmallVector<RepeatedValue, 8>  Tree;
>>>> +  MadeChange |= LinearizeExprTree(I, Tree);
>>>>    SmallVector<ValueEntry, 8>  Ops;
>>>> -  LinearizeExprTree(I, Ops);
>>>> +  Ops.reserve(Tree.size());
>>>> +  for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
>>>> +    RepeatedValue E = Tree[i];
>>>> +    Ops.append(E.second.getZExtValue(),
>>>> +               ValueEntry(getRank(E.first), E.first));
>>>> +  }
>>>>
>>>>    DEBUG(dbgs()<<  "RAIn:\t"; PrintOps(I, Ops); dbgs()<<  '\n');
>>>>
>>>>
>>>> Modified: llvm/trunk/lib/VMCore/Constants.cpp
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=158358&r1=158357&r2=158358&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/lib/VMCore/Constants.cpp (original)
>>>> +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jun 12 09:33:56 2012
>>>> @@ -2007,6 +2007,26 @@
>>>>               isExact ? PossiblyExactOperator::IsExact : 0);
>>>>   }
>>>>
>>>> +/// getBinOpIdentity - Return the identity for the given binary operation,
>>>> +/// i.e. a constant C such that X op C = X and C op X = X for every X.  It
>>>> +/// is an error to call this for an operation that doesn't have an identity.
>>>> +Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) {
>>>> +  switch (Opcode) {
>>>> +  default:
>>>> +    llvm_unreachable("Not a binary operation with identity");
>>>> +  case Instruction::Add:
>>>> +  case Instruction::Or:
>>>> +  case Instruction::Xor:
>>>> +    return Constant::getNullValue(Ty);
>>>> +
>>>> +  case Instruction::Mul:
>>>> +    return ConstantInt::get(Ty, 1);
>>>> +
>>>> +  case Instruction::And:
>>>> +    return Constant::getAllOnesValue(Ty);
>>>> +  }
>>>> +}
>>>> +
>>>>   // destroyConstant - Remove the constant from the constant table...
>>>>   //
>>>>   void ConstantExpr::destroyConstant() {
>>>>
>>>> Modified: llvm/trunk/lib/VMCore/Instruction.cpp
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instruction.cpp?rev=158358&r1=158357&r2=158358&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/lib/VMCore/Instruction.cpp (original)
>>>> +++ llvm/trunk/lib/VMCore/Instruction.cpp Tue Jun 12 09:33:56 2012
>>>> @@ -395,6 +395,29 @@
>>>>    }
>>>>   }
>>>>
>>>> +/// isIdempotent - Return true if the instruction is idempotent:
>>>> +///
>>>> +///   Idempotent operators satisfy:  x op x === x
>>>> +///
>>>> +/// In LLVM, the And and Or operators are idempotent.
>>>> +///
>>>> +bool Instruction::isIdempotent(unsigned Opcode) {
>>>> +  return Opcode == And || Opcode == Or;
>>>> +}
>>>> +
>>>> +/// isNilpotent - Return true if the instruction is nilpotent:
>>>> +///
>>>> +///   Nilpotent operators satisfy:  x op x === Id,
>>>> +///
>>>> +///   where Id is the identity for the operator, i.e. a constant such that
>>>> +///     x op Id === x and Id op x === x for all x.
>>>> +///
>>>> +/// In LLVM, the Xor operator is nilpotent.
>>>> +///
>>>> +bool Instruction::isNilpotent(unsigned Opcode) {
>>>> +  return Opcode == Xor;
>>>> +}
>>>> +
>>>>   Instruction *Instruction::clone() const {
>>>>    Instruction *New = clone_impl();
>>>>    New->SubclassOptionalData = SubclassOptionalData;
>>>>
>>>> Added: llvm/trunk/test/Transforms/Reassociate/repeats.ll
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Reassociate/repeats.ll?rev=158358&view=auto
>>>> ==============================================================================
>>>> --- llvm/trunk/test/Transforms/Reassociate/repeats.ll (added)
>>>> +++ llvm/trunk/test/Transforms/Reassociate/repeats.ll Tue Jun 12 09:33:56 2012
>>>> @@ -0,0 +1,252 @@
>>>> +; RUN: opt<  %s -reassociate -S | FileCheck %s
>>>> +
>>>> +; Tests involving repeated operations on the same value.
>>>> +
>>>> +define i8 @nilpotent(i8 %x) {
>>>> +; CHECK: @nilpotent
>>>> +  %tmp = xor i8 %x, %x
>>>> +  ret i8 %tmp
>>>> +; CHECK: ret i8 0
>>>> +}
>>>> +
>>>> +define i2 @idempotent(i2 %x) {
>>>> +; CHECK: @idempotent
>>>> +  %tmp1 = and i2 %x, %x
>>>> +  %tmp2 = and i2 %tmp1, %x
>>>> +  %tmp3 = and i2 %tmp2, %x
>>>> +  ret i2 %tmp3
>>>> +; CHECK: ret i2 %x
>>>> +}
>>>> +
>>>> +define i2 @add(i2 %x) {
>>>> +; CHECK: @add
>>>> +  %tmp1 = add i2 %x, %x
>>>> +  %tmp2 = add i2 %tmp1, %x
>>>> +  %tmp3 = add i2 %tmp2, %x
>>>> +  ret i2 %tmp3
>>>> +; CHECK: ret i2 0
>>>> +}
>>>> +
>>>> +define i2 @cst_add() {
>>>> +; CHECK: @cst_add
>>>> +  %tmp1 = add i2 1, 1
>>>> +  %tmp2 = add i2 %tmp1, 1
>>>> +  ret i2 %tmp2
>>>> +; CHECK: ret i2 -1
>>>> +}
>>>> +
>>>> +define i8 @cst_mul() {
>>>> +; CHECK: @cst_mul
>>>> +  %tmp1 = mul i8 3, 3
>>>> +  %tmp2 = mul i8 %tmp1, 3
>>>> +  %tmp3 = mul i8 %tmp2, 3
>>>> +  %tmp4 = mul i8 %tmp3, 3
>>>> +  ret i8 %tmp4
>>>> +; CHECK: ret i8 -13
>>>> +}
>>>> +
>>>> +define i3 @foo3x5(i3 %x) {
>>>> +; Can be done with two multiplies.
>>>> +; CHECK: @foo3x5
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i3 %x, %x
>>>> +  %tmp2 = mul i3 %tmp1, %x
>>>> +  %tmp3 = mul i3 %tmp2, %x
>>>> +  %tmp4 = mul i3 %tmp3, %x
>>>> +  ret i3 %tmp4
>>>> +}
>>>> +
>>>> +define i3 @foo3x6(i3 %x) {
>>>> +; Can be done with two multiplies.
>>>> +; CHECK: @foo3x6
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i3 %x, %x
>>>> +  %tmp2 = mul i3 %tmp1, %x
>>>> +  %tmp3 = mul i3 %tmp2, %x
>>>> +  %tmp4 = mul i3 %tmp3, %x
>>>> +  %tmp5 = mul i3 %tmp4, %x
>>>> +  ret i3 %tmp5
>>>> +}
>>>> +
>>>> +define i3 @foo3x7(i3 %x) {
>>>> +; Can be done with two multiplies.
>>>> +; CHECK: @foo3x7
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i3 %x, %x
>>>> +  %tmp2 = mul i3 %tmp1, %x
>>>> +  %tmp3 = mul i3 %tmp2, %x
>>>> +  %tmp4 = mul i3 %tmp3, %x
>>>> +  %tmp5 = mul i3 %tmp4, %x
>>>> +  %tmp6 = mul i3 %tmp5, %x
>>>> +  ret i3 %tmp6
>>>> +}
>>>> +
>>>> +define i4 @foo4x8(i4 %x) {
>>>> +; Can be done with two multiplies.
>>>> +; CHECK: @foo4x8
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  ret i4 %tmp7
>>>> +}
>>>> +
>>>> +define i4 @foo4x9(i4 %x) {
>>>> +; Can be done with three multiplies.
>>>> +; CHECK: @foo4x9
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  %tmp8 = mul i4 %tmp7, %x
>>>> +  ret i4 %tmp8
>>>> +}
>>>> +
>>>> +define i4 @foo4x10(i4 %x) {
>>>> +; Can be done with three multiplies.
>>>> +; CHECK: @foo4x10
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  %tmp8 = mul i4 %tmp7, %x
>>>> +  %tmp9 = mul i4 %tmp8, %x
>>>> +  ret i4 %tmp9
>>>> +}
>>>> +
>>>> +define i4 @foo4x11(i4 %x) {
>>>> +; Can be done with four multiplies.
>>>> +; CHECK: @foo4x11
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  %tmp8 = mul i4 %tmp7, %x
>>>> +  %tmp9 = mul i4 %tmp8, %x
>>>> +  %tmp10 = mul i4 %tmp9, %x
>>>> +  ret i4 %tmp10
>>>> +}
>>>> +
>>>> +define i4 @foo4x12(i4 %x) {
>>>> +; Can be done with two multiplies.
>>>> +; CHECK: @foo4x12
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  %tmp8 = mul i4 %tmp7, %x
>>>> +  %tmp9 = mul i4 %tmp8, %x
>>>> +  %tmp10 = mul i4 %tmp9, %x
>>>> +  %tmp11 = mul i4 %tmp10, %x
>>>> +  ret i4 %tmp11
>>>> +}
>>>> +
>>>> +define i4 @foo4x13(i4 %x) {
>>>> +; Can be done with three multiplies.
>>>> +; CHECK: @foo4x13
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  %tmp8 = mul i4 %tmp7, %x
>>>> +  %tmp9 = mul i4 %tmp8, %x
>>>> +  %tmp10 = mul i4 %tmp9, %x
>>>> +  %tmp11 = mul i4 %tmp10, %x
>>>> +  %tmp12 = mul i4 %tmp11, %x
>>>> +  ret i4 %tmp12
>>>> +}
>>>> +
>>>> +define i4 @foo4x14(i4 %x) {
>>>> +; Can be done with three multiplies.
>>>> +; CHECK: @foo4x14
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  %tmp8 = mul i4 %tmp7, %x
>>>> +  %tmp9 = mul i4 %tmp8, %x
>>>> +  %tmp10 = mul i4 %tmp9, %x
>>>> +  %tmp11 = mul i4 %tmp10, %x
>>>> +  %tmp12 = mul i4 %tmp11, %x
>>>> +  %tmp13 = mul i4 %tmp12, %x
>>>> +  ret i4 %tmp13
>>>> +}
>>>> +
>>>> +define i4 @foo4x15(i4 %x) {
>>>> +; Can be done with four multiplies.
>>>> +; CHECK: @foo4x15
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: mul
>>>> +; CHECK-NEXT: ret
>>>> +  %tmp1 = mul i4 %x, %x
>>>> +  %tmp2 = mul i4 %tmp1, %x
>>>> +  %tmp3 = mul i4 %tmp2, %x
>>>> +  %tmp4 = mul i4 %tmp3, %x
>>>> +  %tmp5 = mul i4 %tmp4, %x
>>>> +  %tmp6 = mul i4 %tmp5, %x
>>>> +  %tmp7 = mul i4 %tmp6, %x
>>>> +  %tmp8 = mul i4 %tmp7, %x
>>>> +  %tmp9 = mul i4 %tmp8, %x
>>>> +  %tmp10 = mul i4 %tmp9, %x
>>>> +  %tmp11 = mul i4 %tmp10, %x
>>>> +  %tmp12 = mul i4 %tmp11, %x
>>>> +  %tmp13 = mul i4 %tmp12, %x
>>>> +  %tmp14 = mul i4 %tmp13, %x
>>>> +  ret i4 %tmp14
>>>> +}
>>>>
>>>>
>>>> _______________________________________________
>>>> llvm-commits mailing list
>>>> llvm-commits at cs.uiuc.edu
>>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list