[llvm] r210342 - [SLP] Enable vectorization of GEP expressions.

Fri Jun 6 08:34:25 PDT 2014

Author: mzolotukhin
Date: Fri Jun  6 10:34:24 2014
New Revision: 210342

URL: http://llvm.org/viewvc/llvm-project?rev=210342&view=rev
Log:
[SLP] Enable vectorization of GEP expressions.

The use cases look like the following:
    x->a = y->a + 10
    x->b = y->b + 12

Added:
    llvm/trunk/test/Transforms/SLPVectorizer/X86/gep.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=210342&r1=210341&r2=210342&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Jun  6 10:34:24 2014
@@ -941,6 +941,51 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
       }
       return;
     }
+    case Instruction::GetElementPtr: {
+      // We don't combine GEPs with complicated (nested) indexing.
+      for (unsigned j = 0; j < VL.size(); ++j) {
+        if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
+          DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
+          newTreeEntry(VL, false);
+          return;
+        }
+      }
+
+      // We can't combine several GEPs into one vector if they operate on
+      // different types.
+      Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
+      for (unsigned j = 0; j < VL.size(); ++j) {
+        Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
+        if (Ty0 != CurTy) {
+          DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
+          newTreeEntry(VL, false);
+          return;
+        }
+      }
+
+      // We don't combine GEPs with non-constant indexes.
+      for (unsigned j = 0; j < VL.size(); ++j) {
+        auto Op = cast<Instruction>(VL[j])->getOperand(1);
+        if (!isa<ConstantInt>(Op)) {
+          DEBUG(
+              dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
+          newTreeEntry(VL, false);
+          return;
+        }
+      }
+
+      newTreeEntry(VL, true);
+      DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
+      for (unsigned i = 0, e = 2; i < e; ++i) {
+        ValueList Operands;
+        // Prepare the operand vector.
+        for (unsigned j = 0; j < VL.size(); ++j)
+          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+        buildTree_rec(Operands, Depth + 1);
+      }
+      return;
+    }
     case Instruction::Store: {
       // Check if the stores are consecutive or of we need to swizzle them.
       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
@@ -1146,6 +1191,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
       }
       return VecCost - ScalarCost;
     }
+    case Instruction::GetElementPtr: {
+      TargetTransformInfo::OperandValueKind Op1VK =
+          TargetTransformInfo::OK_AnyValue;
+      TargetTransformInfo::OperandValueKind Op2VK =
+          TargetTransformInfo::OK_UniformConstantValue;
+
+      int ScalarCost =
+          VecTy->getNumElements() *
+          TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
+      int VecCost =
+          TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
+
+      return VecCost - ScalarCost;
+    }
     case Instruction::Load: {
       // Cost of wide load - cost of scalar loads.
       int ScalarLdCost = VecTy->getNumElements() *
@@ -1674,6 +1733,34 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
       E->VectorizedValue = S;
       return propagateMetadata(S, E->Scalars);
     }
+    case Instruction::GetElementPtr: {
+      setInsertPointAfterBundle(E->Scalars);
+
+      ValueList Op0VL;
+      for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+        Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
+
+      Value *Op0 = vectorizeTree(Op0VL);
+
+      std::vector<Value *> OpVecs;
+      for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
+           ++j) {
+        ValueList OpVL;
+        for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+          OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
+
+        Value *OpVec = vectorizeTree(OpVL);
+        OpVecs.push_back(OpVec);
+      }
+
+      Value *V = Builder.CreateGEP(Op0, OpVecs);
+      E->VectorizedValue = V;
+
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        return propagateMetadata(I, E->Scalars);
+
+      return V;
+    }
     case Instruction::Call: {
       CallInst *CI = cast<CallInst>(VL0);
       setInsertPointAfterBundle(E->Scalars);

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/gep.ll?rev=210342&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/gep.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/gep.ll Fri Jun  6 10:34:24 2014
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S |Filecheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Test if SLP can handle GEP expressions.
+; The test perform the following action:
+;   x->first  = y->first  + 16
+;   x->second = y->second + 16
+
+; CHECK-LABEL: foo1
+; CHECK: <2 x i32*>
+define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
+  %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+  %2 = load i32** %1, align 8
+  %3 = getelementptr inbounds i32* %2, i64 16
+  %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+  store i32* %3, i32** %4, align 8
+  %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+  %6 = load i32** %5, align 8
+  %7 = getelementptr inbounds i32* %6, i64 16
+  %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+  store i32* %7, i32** %8, align 8
+  ret void
+}
+
+; Test that we don't vectorize GEP expressions if indexes are not constants.
+; We can't produce an efficient code in that case.
+; CHECK-LABEL: foo2
+; CHECK-NOT: <2 x i32*>
+define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
+  %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+  %2 = load i32** %1, align 8
+  %3 = getelementptr inbounds i32* %2, i32 %i
+  %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+  store i32* %3, i32** %4, align 8
+  %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+  %6 = load i32** %5, align 8
+  %7 = getelementptr inbounds i32* %6, i32 %i
+  %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+  store i32* %7, i32** %8, align 8
+  ret void
+}