[llvm] r216371 - Allow vectorization of division by uniform power of 2.

Sun Aug 24 21:56:55 PDT 2014

Author: karthik
Date: Sun Aug 24 23:56:54 2014
New Revision: 216371

URL: http://llvm.org/viewvc/llvm-project?rev=216371&view=rev
Log:
Allow vectorization of division by uniform power of 2.

This patch adds support to recognize division by uniform power of 2 and modifies the cost table to vectorize division by uniform power of 2 whenever possible.
Updates Cost model for Loop and SLP Vectorizer.The cost table is currently only updated for X86 backend.
Thanks to Hal, Andrea, Sanjay for the review. (http://reviews.llvm.org/D4971)


Added:
    llvm/trunk/test/Transforms/LoopVectorize/X86/powof2div.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/powof2div.ll
Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Sun Aug 24 23:56:54 2014
@@ -335,6 +335,9 @@ public:
     OK_NonUniformConstantValue   // Operand is a non uniform constant value.
   };
 
+  /// \brief Additional properties of an operand's values.
+  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
+
   /// \return The number of scalar or vector registers that the target has.
   /// If 'Vectors' is true, it returns the number of vector registers. If it is
   /// set to false, it returns the number of scalar registers.
@@ -349,9 +352,12 @@ public:
   virtual unsigned getMaximumUnrollFactor() const;
 
   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                  OperandValueKind Opd1Info = OK_AnyValue,
-                                  OperandValueKind Opd2Info = OK_AnyValue) const;
+  virtual unsigned
+  getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                         OperandValueKind Opd1Info = OK_AnyValue,
+                         OperandValueKind Opd2Info = OK_AnyValue,
+                         OperandValueProperties Opd1PropInfo = OP_None,
+                         OperandValueProperties Opd2PropInfo = OP_None) const;
 
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
   /// The index and subtype parameters are used by the subvector insertion and

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Sun Aug 24 23:56:54 2014
@@ -171,11 +171,12 @@ unsigned TargetTransformInfo::getMaximum
   return PrevTTI->getMaximumUnrollFactor();
 }
 
-unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
-                                                Type *Ty,
-                                                OperandValueKind Op1Info,
-                                                OperandValueKind Op2Info) const {
-  return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+unsigned TargetTransformInfo::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
+  return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+                                         Opd1PropInfo, Opd2PropInfo);
 }
 
 unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
@@ -569,7 +570,8 @@ struct NoTTI final : ImmutablePass, Targ
   }
 
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind) const override {
+                                  OperandValueKind, OperandValueProperties,
+                                  OperandValueProperties) const override {
     return 1;
   }
 

Modified: llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp Sun Aug 24 23:56:54 2014
@@ -104,7 +104,8 @@ public:
   unsigned getMaximumUnrollFactor() const override;
   unsigned getRegisterBitWidth(bool Vector) const override;
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind) const override;
+                                  OperandValueKind, OperandValueProperties,
+                                  OperandValueProperties) const override;
   unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
                           int Index, Type *SubTp) const override;
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -289,8 +290,9 @@ unsigned BasicTTI::getMaximumUnrollFacto
 }
 
 unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind,
-                                          OperandValueKind) const {
+                                          OperandValueKind, OperandValueKind,
+                                          OperandValueProperties,
+                                          OperandValueProperties) const {
   // Check if any of the operands are vector operands.
   const TargetLoweringBase *TLI = getTLI();
   int ISD = TLI->InstructionOpcodeToISD(Opcode);

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Sun Aug 24 23:56:54 2014
@@ -112,10 +112,11 @@ public:
   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
       override;
 
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                  OperandValueKind Opd1Info = OK_AnyValue,
-                                  OperandValueKind Opd2Info = OK_AnyValue) const
-      override;
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+      OperandValueKind Opd2Info = OK_AnyValue,
+      OperandValueProperties Opd1PropInfo = OP_None,
+      OperandValueProperties Opd2PropInfo = OP_None) const override;
 
   unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
 
@@ -403,9 +404,10 @@ unsigned AArch64TTI::getVectorInstrCost(
   return 2;
 }
 
-unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind Opd1Info,
-                                          OperandValueKind Opd2Info) const {
+unsigned AArch64TTI::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
@@ -413,8 +415,8 @@ unsigned AArch64TTI::getArithmeticInstrC
 
   switch (ISD) {
   default:
-    return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
-                                                       Opd2Info);
+    return TargetTransformInfo::getArithmeticInstrCost(
+        Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
   case ISD::ADD:
   case ISD::MUL:
   case ISD::XOR:

Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp Sun Aug 24 23:56:54 2014
@@ -126,10 +126,11 @@ public:
   unsigned getAddressComputationCost(Type *Val,
                                      bool IsComplex) const override;
 
-  unsigned
-  getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                         OperandValueKind Op1Info = OK_AnyValue,
-                         OperandValueKind Op2Info = OK_AnyValue) const override;
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue,
+      OperandValueKind Op2Info = OK_AnyValue,
+      OperandValueProperties Opd1PropInfo = OP_None,
+      OperandValueProperties Opd2PropInfo = OP_None) const override;
 
   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                            unsigned AddressSpace) const override;
@@ -497,9 +498,10 @@ unsigned ARMTTI::getShuffleCost(ShuffleK
   return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
-unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                        OperandValueKind Op1Info,
-                                        OperandValueKind Op2Info) const {
+unsigned ARMTTI::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
 
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -555,8 +557,8 @@ unsigned ARMTTI::getArithmeticInstrCost(
   if (Idx != -1)
     return LT.first * CostTbl[Idx].Cost;
 
-  unsigned Cost =
-      TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+  unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(
+      Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
 
   // This is somewhat of a hack. The problem that we are facing is that SROA
   // creates a sequence of shift, and, or instructions to construct values.

Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Sun Aug 24 23:56:54 2014
@@ -92,9 +92,10 @@ public:
   virtual unsigned getNumberOfRegisters(bool Vector) const override;
   virtual unsigned getRegisterBitWidth(bool Vector) const override;
   virtual unsigned getMaximumUnrollFactor() const override;
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind,
-                                          OperandValueKind) const override;
+  virtual unsigned
+  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+                         OperandValueKind, OperandValueProperties,
+                         OperandValueProperties) const override;
   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
                                   int Index, Type *SubTp) const override;
   virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -318,14 +319,15 @@ unsigned PPCTTI::getMaximumUnrollFactor(
   return 2;
 }
 
-unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                        OperandValueKind Op1Info,
-                                        OperandValueKind Op2Info) const {
+unsigned PPCTTI::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
-  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
-                                                     Op2Info);
+  return TargetTransformInfo::getArithmeticInstrCost(
+      Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
 }
 
 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Sun Aug 24 23:56:54 2014
@@ -84,7 +84,8 @@ public:
   unsigned getRegisterBitWidth(bool Vector) const override;
   unsigned getMaximumUnrollFactor() const override;
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind) const override;
+                                  OperandValueKind, OperandValueProperties,
+                                  OperandValueProperties) const override;
   unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
                           int Index, Type *SubTp) const override;
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -178,15 +179,37 @@ unsigned X86TTI::getMaximumUnrollFactor(
   return 2;
 }
 
-unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                        OperandValueKind Op1Info,
-                                        OperandValueKind Op2Info) const {
+unsigned X86TTI::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  if (ISD == ISD::SDIV &&
+      Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+    // On X86, vector signed division by constants power-of-two are
+    // normally expanded to the sequence SRA + SRL + ADD + SRA.
+    // The OperandValue properties many not be same as that of previous
+    // operation;conservatively assume OP_None.
+    unsigned Cost =
+        2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
+                                   TargetTransformInfo::OP_None,
+                                   TargetTransformInfo::OP_None);
+    Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+                                   TargetTransformInfo::OP_None,
+                                   TargetTransformInfo::OP_None);
+    Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
+                                   TargetTransformInfo::OP_None,
+                                   TargetTransformInfo::OP_None);
+
+    return Cost;
+  }
+
   static const CostTblEntry<MVT::SimpleValueType>
   AVX2UniformConstCostTable[] = {
     { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Sun Aug 24 23:56:54 2014
@@ -5837,18 +5837,31 @@ LoopVectorizationCostModel::getInstructi
       TargetTransformInfo::OK_AnyValue;
     TargetTransformInfo::OperandValueKind Op2VK =
       TargetTransformInfo::OK_AnyValue;
+    TargetTransformInfo::OperandValueProperties Op1VP =
+        TargetTransformInfo::OP_None;
+    TargetTransformInfo::OperandValueProperties Op2VP =
+        TargetTransformInfo::OP_None;
     Value *Op2 = I->getOperand(1);
 
     // Check for a splat of a constant or for a non uniform vector of constants.
-    if (isa<ConstantInt>(Op2))
+    if (isa<ConstantInt>(Op2)) {
+      ConstantInt *CInt = cast<ConstantInt>(Op2);
+      if (CInt && CInt->getValue().isPowerOf2())
+        Op2VP = TargetTransformInfo::OP_PowerOf2;
       Op2VK = TargetTransformInfo::OK_UniformConstantValue;
-    else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
+    } else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
       Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
-      if (cast<Constant>(Op2)->getSplatValue() != nullptr)
+      Constant *SplatValue = cast<Constant>(Op2)->getSplatValue();
+      if (SplatValue) {
+        ConstantInt *CInt = dyn_cast<ConstantInt>(SplatValue);
+        if (CInt && CInt->getValue().isPowerOf2())
+          Op2VP = TargetTransformInfo::OP_PowerOf2;
         Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+      }
     }
 
-    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
+    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
+                                      Op1VP, Op2VP);
   }
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=216371&r1=216370&r2=216371&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sun Aug 24 23:56:54 2014
@@ -1425,6 +1425,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
             TargetTransformInfo::OK_AnyValue;
         TargetTransformInfo::OperandValueKind Op2VK =
             TargetTransformInfo::OK_UniformConstantValue;
+        TargetTransformInfo::OperandValueProperties Op1VP =
+            TargetTransformInfo::OP_None;
+        TargetTransformInfo::OperandValueProperties Op2VP =
+            TargetTransformInfo::OP_None;
 
         // If all operands are exactly the same ConstantInt then set the
         // operand kind to OK_UniformConstantValue.
@@ -1446,11 +1450,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
               CInt != cast<ConstantInt>(I->getOperand(1)))
             Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
         }
+        // FIXME: Currently cost of model modification for division by
+        // power of 2 is handled only for X86. Add support for other targets.
+        if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt &&
+            CInt->getValue().isPowerOf2())
+          Op2VP = TargetTransformInfo::OP_PowerOf2;
 
-        ScalarCost =
-            VecTy->getNumElements() *
-            TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK);
-        VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK);
+        ScalarCost = VecTy->getNumElements() *
+                     TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK,
+                                                 Op1VP, Op2VP);
+        VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK,
+                                              Op1VP, Op2VP);
       }
       return VecCost - ScalarCost;
     }

Added: llvm/trunk/test/Transforms/LoopVectorize/X86/powof2div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/powof2div.ll?rev=216371&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/powof2div.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/powof2div.ll Sun Aug 24 23:56:54 2014
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+
+ at Foo = common global %struct.anon zeroinitializer, align 4
+
+;CHECK-LABEL: @foo(
+;CHECK: load <4 x i32>*
+;CHECK: sdiv <4 x i32>
+;CHECK: store <4 x i32>
+
+define void @foo(){
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %div = sdiv i32 %0, 2
+  %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/powof2div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/powof2div.ll?rev=216371&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/powof2div.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/powof2div.ll Sun Aug 24 23:56:54 2014
@@ -0,0 +1,43 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @powof2div(
+;CHECK: load <4 x i32>*
+;CHECK: add <4 x i32>
+;CHECK: sdiv <4 x i32>
+define void @powof2div(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+entry:
+  %0 = load i32* %b, align 4
+  %1 = load i32* %c, align 4
+  %add = add nsw i32 %1, %0
+  %div = sdiv i32 %add, 2
+  store i32 %div, i32* %a, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 1
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %c, i64 1
+  %3 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %3, %2
+  %div6 = sdiv i32 %add5, 2
+  %arrayidx7 = getelementptr inbounds i32* %a, i64 1
+  store i32 %div6, i32* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32* %b, i64 2
+  %4 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i64 2
+  %5 = load i32* %arrayidx9, align 4
+  %add10 = add nsw i32 %5, %4
+  %div11 = sdiv i32 %add10, 2
+  %arrayidx12 = getelementptr inbounds i32* %a, i64 2
+  store i32 %div11, i32* %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32* %b, i64 3
+  %6 = load i32* %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32* %c, i64 3
+  %7 = load i32* %arrayidx14, align 4
+  %add15 = add nsw i32 %7, %6
+  %div16 = sdiv i32 %add15, 2
+  %arrayidx17 = getelementptr inbounds i32* %a, i64 3
+  store i32 %div16, i32* %arrayidx17, align 4
+  ret void
+}
+