[llvm] r183005 - Simplify multiplications by vectors whose elements are powers of 2.

Fri May 31 07:27:15 PDT 2013

Author: rafael
Date: Fri May 31 09:27:15 2013
New Revision: 183005

URL: http://llvm.org/viewvc/llvm-project?rev=183005&view=rev
Log:
Simplify multiplications by vectors whose elements are powers of 2.

Patch by Andrea Di Biagio.

Added:
    llvm/trunk/test/Transforms/InstCombine/vector-mul.ll
Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp?rev=183005&r1=183004&r2=183005&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Fri May 31 09:27:15 2013
@@ -95,6 +95,25 @@ static bool MultiplyOverflows(ConstantIn
   return MulExt.slt(Min) || MulExt.sgt(Max);
 }
 
+/// \brief A helper routine of InstCombiner::visitMul().
+///
+/// If C is a vector of known powers of 2, then this function returns
+/// a new vector obtained from C replacing each element with its logBase2.
+/// Return a null pointer otherwise.
+static Constant *getLogBase2Vector(ConstantDataVector *CV) {
+  const APInt *IVal;
+  SmallVector<Constant *, 4> Elts;
+
+  for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
+    Constant *Elt = CV->getElementAsConstant(I);
+    if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
+      return 0;
+    Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2()));
+  }
+
+  return ConstantVector::get(Elts);
+}
+
 Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -108,24 +127,37 @@ Instruction *InstCombiner::visitMul(Bina
   if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
     return BinaryOperator::CreateNeg(Op0, I.getName());
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-
-    // ((X << C1)*C2) == (X * (C2 << C1))
-    if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
-      if (SI->getOpcode() == Instruction::Shl)
-        if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
-          return BinaryOperator::CreateMul(SI->getOperand(0),
-                                           ConstantExpr::getShl(CI, ShOp));
-
-    const APInt &Val = CI->getValue();
-    if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
-      Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
-      BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
-      if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
-      if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
-      return Shl;
+  // Also allow combining multiply instructions on vectors.
+  {
+    Value *NewOp;
+    Constant *C1, *C2;
+    const APInt *IVal;
+    if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_Constant(C2)),
+                        m_Constant(C1))) &&
+        match(C1, m_APInt(IVal)))
+      // ((X << C1)*C2) == (X * (C2 << C1))
+      return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2));
+
+    if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
+      Constant *NewCst = 0;
+      if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2())
+        // Replace X*(2^C) with X << C, where C is either a scalar or a splat.
+        NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2());
+      else if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(C1))
+        // Replace X*(2^C) with X << C, where C is a vector of known
+        // constant powers of 2.
+        NewCst = getLogBase2Vector(CV);
+
+      if (NewCst) {
+        BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
+        if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+        if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+        return Shl;
+      }
     }
+  }
 
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
     // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
     { Value *X; ConstantInt *C1;
       if (Op0->hasOneUse() &&

Added: llvm/trunk/test/Transforms/InstCombine/vector-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vector-mul.ll?rev=183005&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vector-mul.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/vector-mul.ll Fri May 31 09:27:15 2013
@@ -0,0 +1,408 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check that instcombine rewrites multiply by a vector
+; of known constant power-of-2 elements with vector shift.
+
+define <4 x i8> @Zero_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 0, i8 0, i8 0, i8 0>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @Zero_i8
+; CHECK: ret <4 x i8> zeroinitializer
+
+define <4 x i8> @Identity_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 1, i8 1, i8 1, i8 1>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @Identity_i8
+; CHECK: ret <4 x i8> %InVec
+
+define <4 x i8> @AddToSelf_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @AddToSelf_i8
+; CHECK: shl <4 x i8> %InVec, <i8 1, i8 1, i8 1, i8 1>
+; CHECK: ret
+
+define <4 x i8> @SplatPow2Test1_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 4, i8 4>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @SplatPow2Test1_i8
+; CHECK: shl <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+; CHECK: ret
+
+define <4 x i8> @SplatPow2Test2_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 8, i8 8, i8 8, i8 8>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @SplatPow2Test2_i8
+; CHECK: shl <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3>
+; CHECK: ret
+
+define <4 x i8> @MulTest1_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 1, i8 2, i8 4, i8 8>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @MulTest1_i8
+; CHECK: shl <4 x i8> %InVec, <i8 0, i8 1, i8 2, i8 3>
+; CHECK: ret
+
+define <4 x i8> @MulTest2_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @MulTest2_i8
+; CHECK: mul <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3>
+; CHECK: ret
+
+define <4 x i8> @MulTest3_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 2, i8 2>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @MulTest3_i8
+; CHECK: shl <4 x i8> %InVec, <i8 2, i8 2, i8 1, i8 1>
+; CHECK: ret
+
+
+define <4 x i8> @MulTest4_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 0, i8 1>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @MulTest4_i8
+; CHECK: mul <4 x i8> %InVec, <i8 4, i8 4, i8 0, i8 1>
+; CHECK: ret
+
+define <4 x i16> @Zero_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @Zero_i16
+; CHECK: ret <4 x i16> zeroinitializer
+
+define <4 x i16> @Identity_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @Identity_i16
+; CHECK: ret <4 x i16> %InVec
+
+define <4 x i16> @AddToSelf_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @AddToSelf_i16
+; CHECK: shl <4 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1>
+; CHECK: ret
+
+define <4 x i16> @SplatPow2Test1_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 4, i16 4>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @SplatPow2Test1_i16
+; CHECK: shl <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+; CHECK: ret
+
+define <4 x i16> @SplatPow2Test2_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 8, i16 8, i16 8, i16 8>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @SplatPow2Test2_i16
+; CHECK: shl <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3>
+; CHECK: ret
+
+define <4 x i16> @MulTest1_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 1, i16 2, i16 4, i16 8>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @MulTest1_i16
+; CHECK: shl <4 x i16> %InVec, <i16 0, i16 1, i16 2, i16 3>
+; CHECK: ret
+
+define <4 x i16> @MulTest2_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @MulTest2_i16
+; CHECK: mul <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3>
+; CHECK: ret
+
+define <4 x i16> @MulTest3_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 2, i16 2>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @MulTest3_i16
+; CHECK: shl <4 x i16> %InVec, <i16 2, i16 2, i16 1, i16 1>
+; CHECK: ret
+
+define <4 x i16> @MulTest4_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 0, i16 2>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @MulTest4_i16
+; CHECK: mul <4 x i16> %InVec, <i16 4, i16 4, i16 0, i16 2>
+; CHECK: ret
+
+define <4 x i32> @Zero_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @Zero_i32
+; CHECK: ret <4 x i32> zeroinitializer
+
+define <4 x i32> @Identity_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @Identity_i32
+; CHECK: ret <4 x i32> %InVec
+
+define <4 x i32> @AddToSelf_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @AddToSelf_i32
+; CHECK: shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+; CHECK: ret
+
+
+define <4 x i32> @SplatPow2Test1_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 4, i32 4>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @SplatPow2Test1_i32
+; CHECK: shl <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+; CHECK: ret
+
+define <4 x i32> @SplatPow2Test2_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @SplatPow2Test2_i32
+; CHECK: shl <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3>
+; CHECK: ret
+
+define <4 x i32> @MulTest1_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 1, i32 2, i32 4, i32 8>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @MulTest1_i32
+; CHECK: shl <4 x i32> %InVec, <i32 0, i32 1, i32 2, i32 3>
+; CHECK: ret
+
+define <4 x i32> @MulTest2_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @MulTest2_i32
+; CHECK: mul <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3>
+; CHECK: ret
+
+define <4 x i32> @MulTest3_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 2, i32 2>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @MulTest3_i32
+; CHECK: shl <4 x i32> %InVec, <i32 2, i32 2, i32 1, i32 1>
+; CHECK: ret
+
+
+define <4 x i32> @MulTest4_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 0, i32 1>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @MulTest4_i32
+; CHECK: mul <4 x i32> %InVec, <i32 4, i32 4, i32 0, i32 1>
+; CHECK: ret
+
+define <4 x i64> @Zero_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @Zero_i64
+; CHECK: ret <4 x i64> zeroinitializer
+
+define <4 x i64> @Identity_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @Identity_i64
+; CHECK: ret <4 x i64> %InVec
+
+define <4 x i64> @AddToSelf_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @AddToSelf_i64
+; CHECK: shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+; CHECK: ret
+
+define <4 x i64> @SplatPow2Test1_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 4, i64 4>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @SplatPow2Test1_i64
+; CHECK: shl <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+; CHECK: ret
+
+define <4 x i64> @SplatPow2Test2_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 8, i64 8, i64 8, i64 8>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @SplatPow2Test2_i64
+; CHECK: shl <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3>
+; CHECK: ret
+
+define <4 x i64> @MulTest1_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 1, i64 2, i64 4, i64 8>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @MulTest1_i64
+; CHECK: shl <4 x i64> %InVec, <i64 0, i64 1, i64 2, i64 3>
+; CHECK: ret
+
+define <4 x i64> @MulTest2_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @MulTest2_i64
+; CHECK: mul <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3>
+; CHECK: ret
+
+define <4 x i64> @MulTest3_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 2, i64 2>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @MulTest3_i64
+; CHECK: shl <4 x i64> %InVec, <i64 2, i64 2, i64 1, i64 1>
+; CHECK: ret
+
+define <4 x i64> @MulTest4_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 0, i64 1>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @MulTest4_i64
+; CHECK: mul <4 x i64> %InVec, <i64 4, i64 4, i64 0, i64 1>
+; CHECK: ret
+
+; Test also that the following rewriting rule works with vectors
+; of integers as well:
+;   ((X << C1)*C2) == (X * (C2 << C1))
+
+define <4 x i8> @ShiftMulTest1(<4 x i8> %InVec) {
+entry:
+  %shl = shl <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+  %mul = mul <4 x i8> %shl, <i8 3, i8 3, i8 3, i8 3>
+  ret <4 x i8> %mul
+}
+
+; CHECK: @ShiftMulTest1
+; CHECK: mul <4 x i8> %InVec, <i8 12, i8 12, i8 12, i8 12>
+; CHECK: ret
+
+define <4 x i16> @ShiftMulTest2(<4 x i16> %InVec) {
+entry:
+  %shl = shl <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+  %mul = mul <4 x i16> %shl, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %mul
+}
+
+; CHECK: @ShiftMulTest2
+; CHECK: mul <4 x i16> %InVec, <i16 12, i16 12, i16 12, i16 12>
+; CHECK: ret
+
+define <4 x i32> @ShiftMulTest3(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+  %mul = mul <4 x i32> %shl, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %mul
+}
+
+; CHECK: @ShiftMulTest3
+; CHECK: mul <4 x i32> %InVec, <i32 12, i32 12, i32 12, i32 12>
+; CHECK: ret
+
+define <4 x i64> @ShiftMulTest4(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+  %mul = mul <4 x i64> %shl, <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %mul
+}
+
+; CHECK: @ShiftMulTest4
+; CHECK: mul <4 x i64> %InVec, <i64 12, i64 12, i64 12, i64 12>
+; CHECK: ret
+