[llvm] r279370 - Reapply "[SLP] Initialize VectorizedValue when gathering"

Matthew Simpson via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 20 07:49:03 PDT 2016


Author: mssimpso
Date: Sat Aug 20 09:49:02 2016
New Revision: 279370

URL: http://llvm.org/viewvc/llvm-project?rev=279370&view=rev
Log:
Reapply "[SLP] Initialize VectorizedValue when gathering"

The test case included in r279125 exposed existing undefined behavior in the
SLP vectorizer that it did not introduce. This patch reapplies the original
patch, but modifies the test case to avoid hitting the undefined behavior. This
allows us to close PR28330 while keeping the UBSan bot happy. The undefined
behavior the original test uncovered will be addressed in a follow-on patch.

Reference: https://llvm.org/bugs/show_bug.cgi?id=28330

Added:
    llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=279370&r1=279369&r2=279370&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sat Aug 20 09:49:02 2016
@@ -2157,11 +2157,61 @@ void BoUpSLP::reorderInputsAccordingToOp
 }
 
 void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
-  Instruction *VL0 = cast<Instruction>(VL[0]);
-  BasicBlock::iterator NextInst(VL0);
-  ++NextInst;
-  Builder.SetInsertPoint(VL0->getParent(), NextInst);
-  Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+
+  // Get the basic block this bundle is in. All instructions in the bundle
+  // should be in this block.
+  auto *Front = cast<Instruction>(VL.front());
+  auto *BB = Front->getParent();
+  assert(all_of(make_range(VL.begin(), VL.end()), [&](Value *V) -> bool {
+    return cast<Instruction>(V)->getParent() == BB;
+  }));
+
+  // The last instruction in the bundle in program order.
+  Instruction *LastInst = nullptr;
+
+  // Find the last instruction. The common case should be that BB has been
+  // scheduled, and the last instruction is VL.back(). So we start with
+  // VL.back() and iterate over schedule data until we reach the end of the
+  // bundle. The end of the bundle is marked by null ScheduleData.
+  if (BlocksSchedules.count(BB)) {
+    auto *Bundle = BlocksSchedules[BB]->getScheduleData(VL.back());
+    if (Bundle && Bundle->isPartOfBundle())
+      for (; Bundle; Bundle = Bundle->NextInBundle)
+        LastInst = Bundle->Inst;
+  }
+
+  // LastInst can still be null at this point if there's either not an entry
+  // for BB in BlocksSchedules or there's no ScheduleData available for
+  // VL.back(). This can be the case if buildTree_rec aborts for various
+  // reasons (e.g., the maximum recursion depth is reached, the maximum region
+  // size is reached, etc.). ScheduleData is initialized in the scheduling
+  // "dry-run".
+  //
+  // If this happens, we can still find the last instruction by brute force. We
+  // iterate forwards from Front (inclusive) until we either see all
+  // instructions in the bundle or reach the end of the block. If Front is the
+  // last instruction in program order, LastInst will be set to Front, and we
+  // will visit all the remaining instructions in the block.
+  //
+  // One of the reasons we exit early from buildTree_rec is to place an upper
+  // bound on compile-time. Thus, taking an additional compile-time hit here is
+  // not ideal. However, this should be exceedingly rare since it requires that
+  // we both exit early from buildTree_rec and that the bundle be out-of-order
+  // (causing us to iterate all the way to the end of the block).
+  if (!LastInst) {
+    SmallPtrSet<Value *, 16> Bundle(VL.begin(), VL.end());
+    for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
+      if (Bundle.erase(&I))
+        LastInst = &I;
+      if (Bundle.empty())
+        break;
+    }
+  }
+
+  // Set the insertion point after the last instruction in the bundle. Set the
+  // debug location to Front.
+  Builder.SetInsertPoint(BB, next(BasicBlock::iterator(LastInst)));
+  Builder.SetCurrentDebugLocation(Front->getDebugLoc());
 }
 
 Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
@@ -2239,7 +2289,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
 
   if (E->NeedToGather) {
     setInsertPointAfterBundle(E->Scalars);
-    return Gather(E->Scalars, VecTy);
+    auto *V = Gather(E->Scalars, VecTy);
+    E->VectorizedValue = V;
+    return V;
   }
 
   unsigned Opcode = getSameOpcode(E->Scalars);
@@ -2286,7 +2338,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
         E->VectorizedValue = V;
         return V;
       }
-      return Gather(E->Scalars, VecTy);
+      setInsertPointAfterBundle(E->Scalars);
+      auto *V = Gather(E->Scalars, VecTy);
+      E->VectorizedValue = V;
+      return V;
     }
     case Instruction::ExtractValue: {
       if (canReuseExtract(E->Scalars, Instruction::ExtractValue)) {
@@ -2298,7 +2353,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
         E->VectorizedValue = V;
         return propagateMetadata(V, E->Scalars);
       }
-      return Gather(E->Scalars, VecTy);
+      setInsertPointAfterBundle(E->Scalars);
+      auto *V = Gather(E->Scalars, VecTy);
+      E->VectorizedValue = V;
+      return V;
     }
     case Instruction::ZExt:
     case Instruction::SExt:

Added: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll?rev=279370&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll Sat Aug 20 09:49:02 2016
@@ -0,0 +1,87 @@
+; RUN: opt < %s -slp-vectorizer -S | FileCheck %s --check-prefix=DEFAULT
+; RUN: opt < %s -slp-schedule-budget=0 -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+ at a = common global [80 x i8] zeroinitializer, align 16
+
+; DEFAULT-LABEL: @PR28330(
+; DEFAULT: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
+; DEFAULT: %[[S0:.+]] = select <8 x i1> %1, <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; DEFAULT: %[[R0:.+]] = shufflevector <8 x i32> %[[S0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; DEFAULT: %[[R1:.+]] = add <8 x i32> %[[S0]], %[[R0]]
+; DEFAULT: %[[R2:.+]] = shufflevector <8 x i32> %[[R1]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; DEFAULT: %[[R3:.+]] = add <8 x i32> %[[R1]], %[[R2]]
+; DEFAULT: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; DEFAULT: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
+; DEFAULT: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
+; DEFAULT: %tmp34 = add i32 %[[R6]], %tmp17
+;
+; GATHER-LABEL: @PR28330(
+; GATHER: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
+; GATHER: %tmp19 = select i1 %tmp1, i32 -720, i32 -80
+; GATHER: %tmp21 = select i1 %tmp3, i32 -720, i32 -80
+; GATHER: %tmp23 = select i1 %tmp5, i32 -720, i32 -80
+; GATHER: %tmp25 = select i1 %tmp7, i32 -720, i32 -80
+; GATHER: %tmp27 = select i1 %tmp9, i32 -720, i32 -80
+; GATHER: %tmp29 = select i1 %tmp11, i32 -720, i32 -80
+; GATHER: %tmp31 = select i1 %tmp13, i32 -720, i32 -80
+; GATHER: %tmp33 = select i1 %tmp15, i32 -720, i32 -80
+; GATHER: %[[I0:.+]] = insertelement <8 x i32> undef, i32 %tmp19, i32 0
+; GATHER: %[[I1:.+]] = insertelement <8 x i32> %[[I0]], i32 %tmp21, i32 1
+; GATHER: %[[I2:.+]] = insertelement <8 x i32> %[[I1]], i32 %tmp23, i32 2
+; GATHER: %[[I3:.+]] = insertelement <8 x i32> %[[I2]], i32 %tmp25, i32 3
+; GATHER: %[[I4:.+]] = insertelement <8 x i32> %[[I3]], i32 %tmp27, i32 4
+; GATHER: %[[I5:.+]] = insertelement <8 x i32> %[[I4]], i32 %tmp29, i32 5
+; GATHER: %[[I6:.+]] = insertelement <8 x i32> %[[I5]], i32 %tmp31, i32 6
+; GATHER: %[[I7:.+]] = insertelement <8 x i32> %[[I6]], i32 %tmp33, i32 7
+; GATHER: %[[R0:.+]] = shufflevector <8 x i32> %[[I7]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; GATHER: %[[R1:.+]] = add <8 x i32> %[[I7]], %[[R0]]
+; GATHER: %[[R2:.+]] = shufflevector <8 x i32> %[[R1]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GATHER: %[[R3:.+]] = add <8 x i32> %[[R1]], %[[R2]]
+; GATHER: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
+; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
+; GATHER: %tmp34 = add i32 %[[R6]], %tmp17
+
+define void @PR28330(i32 %n) {
+entry:
+  %tmp0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
+  %tmp1 = icmp eq i8 %tmp0, 0
+  %tmp2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
+  %tmp3 = icmp eq i8 %tmp2, 0
+  %tmp4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
+  %tmp5 = icmp eq i8 %tmp4, 0
+  %tmp6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
+  %tmp7 = icmp eq i8 %tmp6, 0
+  %tmp8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
+  %tmp9 = icmp eq i8 %tmp8, 0
+  %tmp10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
+  %tmp11 = icmp eq i8 %tmp10, 0
+  %tmp12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
+  %tmp13 = icmp eq i8 %tmp12, 0
+  %tmp14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
+  %tmp15 = icmp eq i8 %tmp14, 0
+  br label %for.body
+
+for.body:
+  %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
+  %tmp19 = select i1 %tmp1, i32 -720, i32 -80
+  %tmp20 = add i32 %tmp17, %tmp19
+  %tmp21 = select i1 %tmp3, i32 -720, i32 -80
+  %tmp22 = add i32 %tmp20, %tmp21
+  %tmp23 = select i1 %tmp5, i32 -720, i32 -80
+  %tmp24 = add i32 %tmp22, %tmp23
+  %tmp25 = select i1 %tmp7, i32 -720, i32 -80
+  %tmp26 = add i32 %tmp24, %tmp25
+  %tmp27 = select i1 %tmp9, i32 -720, i32 -80
+  %tmp28 = add i32 %tmp26, %tmp27
+  %tmp29 = select i1 %tmp11, i32 -720, i32 -80
+  %tmp30 = add i32 %tmp28, %tmp29
+  %tmp31 = select i1 %tmp13, i32 -720, i32 -80
+  %tmp32 = add i32 %tmp30, %tmp31
+  %tmp33 = select i1 %tmp15, i32 -720, i32 -80
+  %tmp34 = add i32 %tmp32, %tmp33
+  br label %for.body
+}




More information about the llvm-commits mailing list