[llvm] r276056 - [LSV] Insert stores at the right point.

Justin Lebar via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 19 16:19:21 PDT 2016


Author: jlebar
Date: Tue Jul 19 18:19:20 2016
New Revision: 276056

URL: http://llvm.org/viewvc/llvm-project?rev=276056&view=rev
Log:
[LSV] Insert stores at the right point.

Summary:
Previously, the insertion point for stores was the last instruction in
Chain *before calling getVectorizablePrefixEndIdx*.  Thus if
getVectorizablePrefixEndIdx didn't return Chain.size(), we still would
insert at the last instruction in Chain.

This patch changes our internal API a bit in an attempt to make it less
prone to this sort of error.  As a result, we end up recalculating the
Chain's boundary instructions, but I think worrying about the speed hit
of this is a premature optimization right now.

Reviewers: asbirlea, tstellarAMD

Subscribers: mzolotukhin, arsenm, llvm-commits

Differential Revision: https://reviews.llvm.org/D22534

Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
    llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp?rev=276056&r1=276055&r2=276056&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Tue Jul 19 18:19:20 2016
@@ -104,13 +104,12 @@ private:
   std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
   splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
 
-  /// Checks for instructions which may affect the memory accessed
-  /// in the chain between \p From and \p To. Returns Index, where
-  /// \p Chain[0, Index) is the largest vectorizable chain prefix.
-  /// The elements of \p Chain should be all loads or all stores.
-  unsigned getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
-                                       BasicBlock::iterator From,
-                                       BasicBlock::iterator To);
+  /// Finds the largest prefix of Chain that's vectorizable, checking for
+  /// intervening instructions which may affect the memory accessed by the
+  /// instructions within Chain.
+  ///
+  /// The elements of \p Chain must be all loads or all stores.
+  ArrayRef<Value *> getVectorizablePrefix(ArrayRef<Value *> Chain);
 
   /// Collects load and store instructions to vectorize.
   std::pair<ValueListMap, ValueListMap> collectInstructions(BasicBlock *BB);
@@ -424,14 +423,12 @@ Vectorizer::splitOddVectorElts(ArrayRef<
   return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
 }
 
-unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
-                                                 BasicBlock::iterator From,
-                                                 BasicBlock::iterator To) {
+ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
   SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
   SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
 
   unsigned InstrIdx = 0;
-  for (Instruction &I : make_range(From, To)) {
+  for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
     ++InstrIdx;
     if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
       if (!is_contained(Chain, &I))
@@ -445,7 +442,7 @@ unsigned Vectorizer::getVectorizablePref
   }
 
   assert(Chain.size() == ChainInstrs.size() &&
-         "All instructions in the Chain must exist in [From, To).");
+         "All instrs in Chain must be within range getBoundaryInstrs(Chain).");
 
   unsigned ChainIdx = 0;
   for (auto EntryChain : ChainInstrs) {
@@ -487,12 +484,12 @@ unsigned Vectorizer::getVectorizablePref
                  << "  " << *Ptr1 << '\n';
         });
 
-        return ChainIdx;
+        return Chain.slice(0, ChainIdx);
       }
     }
     ChainIdx++;
   }
-  return Chain.size();
+  return Chain;
 }
 
 std::pair<ValueListMap, ValueListMap>
@@ -694,22 +691,20 @@ bool Vectorizer::vectorizeStoreChain(
     return false;
   }
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-  unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
-  if (StopChain == 0) {
+  ArrayRef<Value *> NewChain = getVectorizablePrefix(Chain);
+  if (NewChain.empty()) {
     // There exists a side effect instruction, no vectorization possible.
     InstructionsProcessed->insert(Chain.begin(), Chain.end());
     return false;
   }
-  if (StopChain == 1) {
+  if (NewChain.size() == 1) {
     // Failed after the first instruction. Discard it and try the smaller chain.
-    InstructionsProcessed->insert(Chain.front());
+    InstructionsProcessed->insert(NewChain.front());
     return false;
   }
 
   // Update Chain to the valid vectorizable subchain.
-  Chain = Chain.slice(0, StopChain);
+  Chain = NewChain;
   ChainSize = Chain.size();
 
   // Store size should be 1B, 2B or multiple of 4B.
@@ -773,7 +768,8 @@ bool Vectorizer::vectorizeStoreChain(
     }
   }
 
-  // Set insert point.
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
   Builder.SetInsertPoint(&*Last);
 
   Value *Vec = UndefValue::get(VecTy);
@@ -849,22 +845,20 @@ bool Vectorizer::vectorizeLoadChain(
     return false;
   }
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-  unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
-  if (StopChain == 0) {
+  ArrayRef<Value *> NewChain = getVectorizablePrefix(Chain);
+  if (NewChain.empty()) {
     // There exists a side effect instruction, no vectorization possible.
     InstructionsProcessed->insert(Chain.begin(), Chain.end());
     return false;
   }
-  if (StopChain == 1) {
+  if (NewChain.size() == 1) {
     // Failed after the first instruction. Discard it and try the smaller chain.
-    InstructionsProcessed->insert(Chain.front());
+    InstructionsProcessed->insert(NewChain.front());
     return false;
   }
 
   // Update Chain to the valid vectorizable subchain.
-  Chain = Chain.slice(0, StopChain);
+  Chain = NewChain;
   ChainSize = Chain.size();
 
   // Load size should be 1B, 2B or multiple of 4B.
@@ -927,7 +921,11 @@ bool Vectorizer::vectorizeLoadChain(
       V->dump();
   });
 
-  // Set insert point.
+  // getVectorizablePrefix already computed getBoundaryInstrs.  The value of
+  // Last may have changed since then, but the value of First won't have.  If it
+  // matters, we could compute getBoundaryInstrs only once and reuse it here.
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
   Builder.SetInsertPoint(&*First);
 
   Value *Bitcast =

Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll?rev=276056&r1=276055&r2=276056&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll Tue Jul 19 18:19:20 2016
@@ -2,8 +2,9 @@
 
 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
 
-; Check relative position of the inserted vector load relative to the existing
-; adds. Vectorized loads should be inserted at the position of the first load.
+; Check position of the inserted vector load/store.  Vectorized loads should be
+; inserted at the position of the first load in the chain, and stores should be
+; inserted at the position of the last store.
 
 ; CHECK-LABEL: @insert_load_point(
 ; CHECK: %z = add i32 %x, 4
@@ -59,4 +60,30 @@ entry:
   ret void
 }
 
+; Here we have four stores, with an aliasing load before the last one.  We can
+; vectorize the first two stores as <2 x float>, but this vectorized store must
+; be inserted at the location of the second scalar store, not the fourth one.
+;
+; CHECK-LABEL: @insert_store_point_alias
+; CHECK: store <2 x float>
+; CHECK: store float
+; CHECK-SAME: %a.idx.2
+; CHECK: load float, float addrspace(1)* %a.idx.2
+; CHECK: store float
+; CHECK-SAME: %a.idx.3
+define float @insert_store_point_alias(float addrspace(1)* nocapture %a, i64 %idx) {
+  %a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
+  %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
+  %a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
+  %a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
+
+  store float 0.0, float addrspace(1)* %a.idx, align 4
+  store float 0.0, float addrspace(1)* %a.idx.1, align 4
+  store float 0.0, float addrspace(1)* %a.idx.2, align 4
+  %x = load float, float addrspace(1)* %a.idx.2, align 4
+  store float 0.0, float addrspace(1)* %a.idx.3, align 4
+
+  ret float %x
+}
+
 attributes #0 = { nounwind }




More information about the llvm-commits mailing list