[llvm] r275317 - Extended LoadStoreVectorizer to vectorize subchains.

Alina Sbirlea via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 13 14:20:01 PDT 2016


Author: asbirlea
Date: Wed Jul 13 16:20:01 2016
New Revision: 275317

URL: http://llvm.org/viewvc/llvm-project?rev=275317&view=rev
Log:
Extended LoadStoreVectorizer to vectorize subchains.

Summary:
LSV used to abort vectorizing a chain for interleaved load/store accesses that alias.
Allow a valid prefix of the chain to be vectorized, mark just the prefix and retry vectorizing the remaining chain.

Reviewers: llvm-commits, jlebar, arsenm

Subscribers: mzolotukhin

Differential Revision: http://reviews.llvm.org/D22119

Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
    llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp?rev=275317&r1=275316&r2=275317&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Wed Jul 13 16:20:01 2016
@@ -106,11 +106,13 @@ private:
   std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
   splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
 
-  /// Checks if there are any instructions which may affect the memory accessed
-  /// in the chain between \p From and \p To. The elements of \p Chain should be
-  /// all loads or all stores.
-  bool isVectorizable(ArrayRef<Value *> Chain, BasicBlock::iterator From,
-                      BasicBlock::iterator To);
+  /// Checks for instructions which may affect the memory accessed
+  /// in the chain between \p From and \p To. Returns Index, where
+  /// \p Chain[0, Index) is the largest vectorizable chain prefix.
+  /// The elements of \p Chain should be all loads or all stores.
+  unsigned getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
+                                       BasicBlock::iterator From,
+                                       BasicBlock::iterator To);
 
   /// Collects load and store instructions to vectorize.
   void collectInstructions(BasicBlock *BB);
@@ -123,10 +125,12 @@ private:
   bool vectorizeInstructions(ArrayRef<Value *> Instrs);
 
   /// Vectorizes the load instructions in Chain.
-  bool vectorizeLoadChain(ArrayRef<Value *> Chain);
+  bool vectorizeLoadChain(ArrayRef<Value *> Chain,
+                          SmallPtrSet<Value *, 16> *InstructionsProcessed);
 
   /// Vectorizes the store instructions in Chain.
-  bool vectorizeStoreChain(ArrayRef<Value *> Chain);
+  bool vectorizeStoreChain(ArrayRef<Value *> Chain,
+                           SmallPtrSet<Value *, 16> *InstructionsProcessed);
 
   /// Check if this load/store access is misaligned accesses
   bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
@@ -421,50 +425,53 @@ Vectorizer::splitOddVectorElts(ArrayRef<
   return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
 }
 
-bool Vectorizer::isVectorizable(ArrayRef<Value *> Chain,
-                                BasicBlock::iterator From,
-                                BasicBlock::iterator To) {
+unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
+                                                 BasicBlock::iterator From,
+                                                 BasicBlock::iterator To) {
   SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
   SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
 
-  unsigned Idx = 0;
-  for (auto I = From, E = To; I != E; ++I, ++Idx) {
+  unsigned InstrIdx = 0;
+  for (auto I = From; I != To; ++I, ++InstrIdx) {
     if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
       if (!is_contained(Chain, &*I))
-        MemoryInstrs.push_back({&*I, Idx});
+        MemoryInstrs.push_back({&*I, InstrIdx});
       else
-        ChainInstrs.push_back({&*I, Idx});
+        ChainInstrs.push_back({&*I, InstrIdx});
     } else if (I->mayHaveSideEffects()) {
       DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n');
-      return false;
+      return 0;
     }
   }
 
   assert(Chain.size() == ChainInstrs.size() &&
          "All instructions in the Chain must exist in [From, To).");
 
-  for (auto EntryMem : MemoryInstrs) {
-    Value *V = EntryMem.first;
-    unsigned VIdx = EntryMem.second;
-    for (auto EntryChain : ChainInstrs) {
-      Value *VV = EntryChain.first;
-      unsigned VVIdx = EntryChain.second;
-      if (isa<LoadInst>(V) && isa<LoadInst>(VV))
+  unsigned ChainIdx = 0;
+  for (auto EntryChain : ChainInstrs) {
+    Value *ChainInstrValue = EntryChain.first;
+    unsigned ChainInstrIdx = EntryChain.second;
+    for (auto EntryMem : MemoryInstrs) {
+      Value *MemInstrValue = EntryMem.first;
+      unsigned MemInstrIdx = EntryMem.second;
+      if (isa<LoadInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue))
         continue;
 
       // We can ignore the alias as long as the load comes before the store,
       // because that means we won't be moving the load past the store to
       // vectorize it (the vectorized load is inserted at the location of the
       // first load in the chain).
-      if (isa<StoreInst>(V) && isa<LoadInst>(VV) && VVIdx < VIdx)
+      if (isa<StoreInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue) &&
+          ChainInstrIdx < MemInstrIdx)
         continue;
 
       // Same case, but in reverse.
-      if (isa<LoadInst>(V) && isa<StoreInst>(VV) && VVIdx > VIdx)
+      if (isa<LoadInst>(MemInstrValue) && isa<StoreInst>(ChainInstrValue) &&
+          ChainInstrIdx > MemInstrIdx)
         continue;
 
-      Instruction *M0 = cast<Instruction>(V);
-      Instruction *M1 = cast<Instruction>(VV);
+      Instruction *M0 = cast<Instruction>(MemInstrValue);
+      Instruction *M1 = cast<Instruction>(ChainInstrValue);
 
       if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) {
         DEBUG({
@@ -473,17 +480,17 @@ bool Vectorizer::isVectorizable(ArrayRef
 
           dbgs() << "LSV: Found alias.\n"
                     "        Aliasing instruction and pointer:\n"
-                 << *V << " aliases " << *Ptr0 << '\n'
+                 << *MemInstrValue << " aliases " << *Ptr0 << '\n'
                  << "        Aliased instruction and pointer:\n"
-                 << *VV << " aliases " << *Ptr1 << '\n';
+                 << *ChainInstrValue << " aliases " << *Ptr1 << '\n';
         });
 
-        return false;
+        return ChainIdx;
       }
     }
+    ChainIdx++;
   }
-
-  return true;
+  return Chain.size();
 }
 
 void Vectorizer::collectInstructions(BasicBlock *BB) {
@@ -614,10 +621,19 @@ bool Vectorizer::vectorizeInstructions(A
   }
 
   bool Changed = false;
-  SmallPtrSet<Value *, 16> VectorizedValues;
+  SmallPtrSet<Value *, 16> InstructionsProcessed;
 
   for (int Head : Heads) {
-    if (Tails.count(Head))
+    if (InstructionsProcessed.count(Instrs[Head]))
+      continue;
+    bool longerChainExists = false;
+    for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
+      if (Head == Tails[TIt] &&
+          !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
+        longerChainExists = true;
+        break;
+      }
+    if (longerChainExists)
       continue;
 
     // We found an instr that starts a chain. Now follow the chain and try to
@@ -625,7 +641,7 @@ bool Vectorizer::vectorizeInstructions(A
     SmallVector<Value *, 16> Operands;
     int I = Head;
     while (I != -1 && (Tails.count(I) || Heads.count(I))) {
-      if (VectorizedValues.count(Instrs[I]))
+      if (InstructionsProcessed.count(Instrs[I]))
         break;
 
       Operands.push_back(Instrs[I]);
@@ -634,20 +650,18 @@ bool Vectorizer::vectorizeInstructions(A
 
     bool Vectorized = false;
     if (isa<LoadInst>(*Operands.begin()))
-      Vectorized = vectorizeLoadChain(Operands);
+      Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
     else
-      Vectorized = vectorizeStoreChain(Operands);
+      Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
 
-    // Mark the vectorized instructions so that we don't vectorize them again.
-    if (Vectorized)
-      VectorizedValues.insert(Operands.begin(), Operands.end());
     Changed |= Vectorized;
   }
 
   return Changed;
 }
 
-bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
+bool Vectorizer::vectorizeStoreChain(
+    ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
   StoreInst *S0 = cast<StoreInst>(Chain[0]);
 
   // If the vector has an int element, default to int for the whole load.
@@ -670,8 +684,28 @@ bool Vectorizer::vectorizeStoreChain(Arr
   unsigned VF = VecRegSize / Sz;
   unsigned ChainSize = Chain.size();
 
-  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
+  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
+    InstructionsProcessed->insert(Chain.begin(), Chain.end());
+    return false;
+  }
+
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
+  unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
+  if (StopChain == 0) {
+    // There exists a side effect instruction, no vectorization possible.
+    InstructionsProcessed->insert(Chain.begin(), Chain.end());
     return false;
+  }
+  if (StopChain == 1) {
+    // Failed after the first instruction. Discard it and try the smaller chain.
+    InstructionsProcessed->insert(Chain.front());
+    return false;
+  }
+
+  // Update Chain to the valid vectorizable subchain.
+  Chain = Chain.slice(0, StopChain);
+  ChainSize = Chain.size();
 
   // Store size should be 1B, 2B or multiple of 4B.
   // TODO: Target hook for size constraint?
@@ -680,11 +714,12 @@ bool Vectorizer::vectorizeStoreChain(Arr
     DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
                     "or multiple of 4B. Splitting.\n");
     if (SzInBytes == 3)
-      return vectorizeStoreChain(Chain.slice(0, ChainSize - 1));
+      return vectorizeStoreChain(Chain.slice(0, ChainSize - 1),
+                                 InstructionsProcessed);
 
     auto Chains = splitOddVectorElts(Chain, Sz);
-    return vectorizeStoreChain(Chains.first) |
-           vectorizeStoreChain(Chains.second);
+    return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
+           vectorizeStoreChain(Chains.second, InstructionsProcessed);
   }
 
   VectorType *VecTy;
@@ -700,8 +735,8 @@ bool Vectorizer::vectorizeStoreChain(Arr
   if (ChainSize > VF) {
     DEBUG(dbgs() << "LSV: Vector factor is too big."
                     " Creating two separate arrays.\n");
-    return vectorizeStoreChain(Chain.slice(0, VF)) |
-           vectorizeStoreChain(Chain.slice(VF));
+    return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) |
+           vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed);
   }
 
   DEBUG({
@@ -710,6 +745,10 @@ bool Vectorizer::vectorizeStoreChain(Arr
       V->dump();
   });
 
+  // We won't try again to vectorize the elements of the chain, regardless of
+  // whether we succeed below.
+  InstructionsProcessed->insert(Chain.begin(), Chain.end());
+
   // Check alignment restrictions.
   unsigned Alignment = getAlignment(S0);
 
@@ -729,12 +768,6 @@ bool Vectorizer::vectorizeStoreChain(Arr
     }
   }
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-
-  if (!isVectorizable(Chain, First, Last))
-    return false;
-
   // Set insert point.
   Builder.SetInsertPoint(&*Last);
 
@@ -782,7 +815,8 @@ bool Vectorizer::vectorizeStoreChain(Arr
   return true;
 }
 
-bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
+bool Vectorizer::vectorizeLoadChain(
+    ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
   LoadInst *L0 = cast<LoadInst>(Chain[0]);
 
   // If the vector has an int element, default to int for the whole load.
@@ -805,8 +839,28 @@ bool Vectorizer::vectorizeLoadChain(Arra
   unsigned VF = VecRegSize / Sz;
   unsigned ChainSize = Chain.size();
 
-  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
+  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
+    InstructionsProcessed->insert(Chain.begin(), Chain.end());
+    return false;
+  }
+
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
+  unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
+  if (StopChain == 0) {
+    // There exists a side effect instruction, no vectorization possible.
+    InstructionsProcessed->insert(Chain.begin(), Chain.end());
+    return false;
+  }
+  if (StopChain == 1) {
+    // Failed after the first instruction. Discard it and try the smaller chain.
+    InstructionsProcessed->insert(Chain.front());
     return false;
+  }
+
+  // Update Chain to the valid vectorizable subchain.
+  Chain = Chain.slice(0, StopChain);
+  ChainSize = Chain.size();
 
   // Load size should be 1B, 2B or multiple of 4B.
   // TODO: Should size constraint be a target hook?
@@ -815,9 +869,11 @@ bool Vectorizer::vectorizeLoadChain(Arra
     DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
                     "or multiple of 4B. Splitting.\n");
     if (SzInBytes == 3)
-      return vectorizeLoadChain(Chain.slice(0, ChainSize - 1));
+      return vectorizeLoadChain(Chain.slice(0, ChainSize - 1),
+                                InstructionsProcessed);
     auto Chains = splitOddVectorElts(Chain, Sz);
-    return vectorizeLoadChain(Chains.first) | vectorizeLoadChain(Chains.second);
+    return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
+           vectorizeLoadChain(Chains.second, InstructionsProcessed);
   }
 
   VectorType *VecTy;
@@ -833,10 +889,14 @@ bool Vectorizer::vectorizeLoadChain(Arra
   if (ChainSize > VF) {
     DEBUG(dbgs() << "LSV: Vector factor is too big. "
                     "Creating two separate arrays.\n");
-    return vectorizeLoadChain(Chain.slice(0, VF)) |
-           vectorizeLoadChain(Chain.slice(VF));
+    return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) |
+           vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed);
   }
 
+  // We won't try again to vectorize the elements of the chain, regardless of
+  // whether we succeed below.
+  InstructionsProcessed->insert(Chain.begin(), Chain.end());
+
   // Check alignment restrictions.
   unsigned Alignment = getAlignment(L0);
 
@@ -862,12 +922,6 @@ bool Vectorizer::vectorizeLoadChain(Arra
       V->dump();
   });
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-
-  if (!isVectorizable(Chain, First, Last))
-    return false;
-
   // Set insert point.
   Builder.SetInsertPoint(&*First);
 

Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll?rev=275317&r1=275316&r2=275317&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll Wed Jul 13 16:20:01 2016
@@ -48,8 +48,7 @@ define void @interleave_3L_2S_1L(i32* no
 ; CHECK-LABEL: @chain_suffix(
 ; CHECK: load i32
 ; CHECK: store <2 x i32>
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <2 x i32>
 define void @chain_suffix(i32* noalias %ptr) {
   %next.gep = getelementptr i32, i32* %ptr, i64 0
   %next.gep1 = getelementptr i32, i32* %ptr, i64 1
@@ -66,12 +65,9 @@ define void @chain_suffix(i32* noalias %
 
 
 ; CHECK-LABEL: @chain_prefix_suffix(
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <2 x i32>
 ; CHECK: store <2 x i32>
-; CHECK: load i32
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <3 x i32>
 define void  @chain_prefix_suffix(i32* noalias %ptr) {
   %next.gep = getelementptr i32, i32* %ptr, i64 0
   %next.gep1 = getelementptr i32, i32* %ptr, i64 1




More information about the llvm-commits mailing list