[PATCH] Extend SLPVectorizer for cases where insertelement instructions must be rescheduled

Arch D. Robison arch.robison at intel.com
Tue Mar 25 13:11:24 PDT 2014


  Removed a gratuitous addition of blank line.

Hi #llvm,

http://llvm-reviews.chandlerc.com/D3143

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D3143?vs=8105&id=8109#toc

Files:
  lib/Transforms/Vectorize/SLPVectorizer.cpp
  test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -366,8 +366,9 @@
   int getTreeCost();
 
   /// Construct a vectorizable tree that starts at \p Roots and is possibly
-  /// used by a reduction of \p RdxOps.
-  void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
+  /// used by a reduction of \p RdxOps.  Flag \p Ruses should be true if the
+  /// reduction values do not need to be extracted.
+  void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0, bool RdxFreeExtract=false);
 
   /// Clear the internal data structures that are created by 'buildTree'.
   void deleteTree() {
@@ -384,6 +385,10 @@
 
   /// \brief Perform LICM and CSE on the newly generated gather sequences.
   void optimizeGatherSequence();
+
+  /// \brief Move InsertElement instructions with indices preceding LastIndex
+  /// \p IE is the root of a chain identified by findBuildVector. 
+  void movePrematureInserts(ArrayRef<Value *> VL, InsertElementInst *IE);
 private:
   struct TreeEntry;
 
@@ -542,7 +547,8 @@
   IRBuilder<> Builder;
 };
 
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx, bool RdxFreeExtract) {
+  assert(!RdxFreeExtract||Rdx);
   deleteTree();
   RdxOps = Rdx;
   if (!getSameType(Roots))
@@ -576,8 +582,8 @@
         if (!UserInst)
           continue;
 
-        // Ignore uses that are part of the reduction.
-        if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
+        // Ignore uses that are part of the reduction that will not need extracts.
+        if (RdxFreeExtract && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
           continue;
 
         DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
@@ -1840,6 +1846,29 @@
   GatherSeq.clear();
 }
 
+void BoUpSLP::movePrematureInserts(ArrayRef<Value *> VL, InsertElementInst *IE) {
+  Instruction *VL0 = cast<Instruction>(VL[0]);
+  int MyLastIndex = getLastIndex(VL);
+  BasicBlock *BB = cast<Instruction>(VL0)->getParent();
+  BlockNumbering &BN = BlocksNumbers[BB];
+  DEBUG(dbgs() << "SLP: Moving premature inserts\n");
+  Instruction* x = BN.getInstruction(MyLastIndex);
+  while (IE->getParent()==BB) {
+    int UserIndex = BN.getIndex(IE);
+    if (UserIndex >= MyLastIndex) {
+      // Walked past transformed region
+      break;
+    }
+    IE->removeFromParent();
+    IE->insertAfter(x);
+    DEBUG(dbgs() << "SLP:    Rescheduled: " << *IE << ".\n");
+    x = IE;
+    IE = dyn_cast<InsertElementInst>(IE->user_back());
+    if (!IE) 
+      break;
+  } 
+}
+
 /// The SLPVectorizer Pass.
 struct SLPVectorizer : public FunctionPass {
   typedef SmallVector<StoreInst *, 8> StoreList;
@@ -1943,7 +1972,7 @@
 
   /// \brief Try to vectorize a list of operands.
   /// \returns true if a value was vectorized.
-  bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+  bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, InsertElementInst *IE=0, BoUpSLP::ValueSet *Inserts=0);
 
   /// \brief Try to vectorize a chain that may start at the operands of \V;
   bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
@@ -2116,7 +2145,7 @@
   return tryToVectorizeList(VL, R);
 }
 
-bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, InsertElementInst *IE, BoUpSLP::ValueSet* Inserts) {
   if (VL.size() < 2)
     return false;
 
@@ -2166,10 +2195,14 @@
                  << "\n");
     ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
 
-    R.buildTree(Ops);
+    R.buildTree(Ops, Inserts);
     int Cost = R.getTreeCost();
 
     if (Cost < -SLPCostThreshold) {
+      if (Inserts) {
+        R.movePrematureInserts(VL, IE);  
+        Inserts = 0;
+      }
       DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
       R.vectorizeTree();
 
@@ -2412,7 +2445,7 @@
 
     for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
       ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
-      V.buildTree(ValsToReduce, &ReductionOps);
+      V.buildTree(ValsToReduce, &ReductionOps, true);
 
       // Estimate cost.
       int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
@@ -2529,10 +2562,12 @@
 ///  %rc = insertelement <4 x float> %rb, float %s2, i32 2
 ///  %rd = insertelement <4 x float> %rc, float %s3, i32 3
 ///
-/// Returns true if it matches
+/// Returns true if it matches.  Sets \p Ops to the values inserted
+/// and \p Inserts to the insertelement instructions.
 ///
 static bool findBuildVector(InsertElementInst *IE,
-                            SmallVectorImpl<Value *> &Ops) {
+                            SmallVectorImpl<Value *> &Ops,
+                            BoUpSLP::ValueSet &Inserts) {
   if (!isa<UndefValue>(IE->getOperand(0)))
     return false;
 
@@ -2551,6 +2586,7 @@
     if (!IE->hasOneUse())
       return false;
 
+    Inserts.insert(IE);
     IE = NextUse;
   }
 
@@ -2709,10 +2745,11 @@
     // Try to vectorize trees that start at insertelement instructions.
     if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
       SmallVector<Value *, 8> Ops;
-      if (!findBuildVector(IE, Ops))
+      BoUpSLP::ValueSet Inserts;
+      if (!findBuildVector(IE, Ops, Inserts))
         continue;
 
-      if (tryToVectorizeList(Ops, R)) {
+      if (tryToVectorizeList(Ops, R, IE, &Inserts)) {
         Changed = true;
         it = BB->begin();
         e = BB->end();
Index: test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -194,4 +194,28 @@
   ret <4 x float> %rb
 }
 
+; Make sure that vectorization happens even if extractelement operations
+; must be rescheduled.  The case here is from compiling Julia.
+define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @reschedule_extract(
+; CHECK: %1 = fadd <4 x float> %a, %b
+  %a0 = extractelement <4 x float> %a, i32 0
+  %b0 = extractelement <4 x float> %b, i32 0
+  %c0 = fadd float %a0, %b0
+  %v0 = insertelement <4 x float> undef, float %c0, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %b1 = extractelement <4 x float> %b, i32 1
+  %c1 = fadd float %a1, %b1
+  %v1 = insertelement <4 x float> %v0, float %c1, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %b2 = extractelement <4 x float> %b, i32 2
+  %c2 = fadd float %a2, %b2
+  %v2 = insertelement <4 x float> %v1, float %c2, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b3 = extractelement <4 x float> %b, i32 3
+  %c3 = fadd float %a3, %b3
+  %v3 = insertelement <4 x float> %v2, float %c3, i32 3
+  ret <4 x float> %v3
+}
+
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3143.4.patch
Type: text/x-patch
Size: 7303 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140325/3fe22f28/attachment.bin>


More information about the llvm-commits mailing list