[llvm] r275317 - Extended LoadStoreVectorizer to vectorize subchains.
Alina Sbirlea via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 13 14:20:01 PDT 2016
Author: asbirlea
Date: Wed Jul 13 16:20:01 2016
New Revision: 275317
URL: http://llvm.org/viewvc/llvm-project?rev=275317&view=rev
Log:
Extended LoadStoreVectorizer to vectorize subchains.
Summary:
LSV used to abort vectorizing a chain for interleaved load/store accesses that alias.
Allow a valid prefix of the chain to be vectorized, mark just the prefix and retry vectorizing the remaining chain.
Reviewers: llvm-commits, jlebar, arsenm
Subscribers: mzolotukhin
Differential Revision: http://reviews.llvm.org/D22119
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp?rev=275317&r1=275316&r2=275317&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Wed Jul 13 16:20:01 2016
@@ -106,11 +106,13 @@ private:
std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
- /// Checks if there are any instructions which may affect the memory accessed
- /// in the chain between \p From and \p To. The elements of \p Chain should be
- /// all loads or all stores.
- bool isVectorizable(ArrayRef<Value *> Chain, BasicBlock::iterator From,
- BasicBlock::iterator To);
+ /// Checks for instructions which may affect the memory accessed
+ /// in the chain between \p From and \p To. Returns Index, where
+ /// \p Chain[0, Index) is the largest vectorizable chain prefix.
+ /// The elements of \p Chain should be all loads or all stores.
+ unsigned getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
+ BasicBlock::iterator From,
+ BasicBlock::iterator To);
/// Collects load and store instructions to vectorize.
void collectInstructions(BasicBlock *BB);
@@ -123,10 +125,12 @@ private:
bool vectorizeInstructions(ArrayRef<Value *> Instrs);
/// Vectorizes the load instructions in Chain.
- bool vectorizeLoadChain(ArrayRef<Value *> Chain);
+ bool vectorizeLoadChain(ArrayRef<Value *> Chain,
+ SmallPtrSet<Value *, 16> *InstructionsProcessed);
/// Vectorizes the store instructions in Chain.
- bool vectorizeStoreChain(ArrayRef<Value *> Chain);
+ bool vectorizeStoreChain(ArrayRef<Value *> Chain,
+ SmallPtrSet<Value *, 16> *InstructionsProcessed);
/// Check if this load/store access is misaligned accesses
bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
@@ -421,50 +425,53 @@ Vectorizer::splitOddVectorElts(ArrayRef<
return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
}
-bool Vectorizer::isVectorizable(ArrayRef<Value *> Chain,
- BasicBlock::iterator From,
- BasicBlock::iterator To) {
+unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
+ BasicBlock::iterator From,
+ BasicBlock::iterator To) {
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
- unsigned Idx = 0;
- for (auto I = From, E = To; I != E; ++I, ++Idx) {
+ unsigned InstrIdx = 0;
+ for (auto I = From; I != To; ++I, ++InstrIdx) {
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
if (!is_contained(Chain, &*I))
- MemoryInstrs.push_back({&*I, Idx});
+ MemoryInstrs.push_back({&*I, InstrIdx});
else
- ChainInstrs.push_back({&*I, Idx});
+ ChainInstrs.push_back({&*I, InstrIdx});
} else if (I->mayHaveSideEffects()) {
DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n');
- return false;
+ return 0;
}
}
assert(Chain.size() == ChainInstrs.size() &&
"All instructions in the Chain must exist in [From, To).");
- for (auto EntryMem : MemoryInstrs) {
- Value *V = EntryMem.first;
- unsigned VIdx = EntryMem.second;
- for (auto EntryChain : ChainInstrs) {
- Value *VV = EntryChain.first;
- unsigned VVIdx = EntryChain.second;
- if (isa<LoadInst>(V) && isa<LoadInst>(VV))
+ unsigned ChainIdx = 0;
+ for (auto EntryChain : ChainInstrs) {
+ Value *ChainInstrValue = EntryChain.first;
+ unsigned ChainInstrIdx = EntryChain.second;
+ for (auto EntryMem : MemoryInstrs) {
+ Value *MemInstrValue = EntryMem.first;
+ unsigned MemInstrIdx = EntryMem.second;
+ if (isa<LoadInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue))
continue;
// We can ignore the alias as long as the load comes before the store,
// because that means we won't be moving the load past the store to
// vectorize it (the vectorized load is inserted at the location of the
// first load in the chain).
- if (isa<StoreInst>(V) && isa<LoadInst>(VV) && VVIdx < VIdx)
+ if (isa<StoreInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue) &&
+ ChainInstrIdx < MemInstrIdx)
continue;
// Same case, but in reverse.
- if (isa<LoadInst>(V) && isa<StoreInst>(VV) && VVIdx > VIdx)
+ if (isa<LoadInst>(MemInstrValue) && isa<StoreInst>(ChainInstrValue) &&
+ ChainInstrIdx > MemInstrIdx)
continue;
- Instruction *M0 = cast<Instruction>(V);
- Instruction *M1 = cast<Instruction>(VV);
+ Instruction *M0 = cast<Instruction>(MemInstrValue);
+ Instruction *M1 = cast<Instruction>(ChainInstrValue);
if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) {
DEBUG({
@@ -473,17 +480,17 @@ bool Vectorizer::isVectorizable(ArrayRef
dbgs() << "LSV: Found alias.\n"
" Aliasing instruction and pointer:\n"
- << *V << " aliases " << *Ptr0 << '\n'
+ << *MemInstrValue << " aliases " << *Ptr0 << '\n'
<< " Aliased instruction and pointer:\n"
- << *VV << " aliases " << *Ptr1 << '\n';
+ << *ChainInstrValue << " aliases " << *Ptr1 << '\n';
});
- return false;
+ return ChainIdx;
}
}
+ ChainIdx++;
}
-
- return true;
+ return Chain.size();
}
void Vectorizer::collectInstructions(BasicBlock *BB) {
@@ -614,10 +621,19 @@ bool Vectorizer::vectorizeInstructions(A
}
bool Changed = false;
- SmallPtrSet<Value *, 16> VectorizedValues;
+ SmallPtrSet<Value *, 16> InstructionsProcessed;
for (int Head : Heads) {
- if (Tails.count(Head))
+ if (InstructionsProcessed.count(Instrs[Head]))
+ continue;
+ bool longerChainExists = false;
+ for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
+ if (Head == Tails[TIt] &&
+ !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
+ longerChainExists = true;
+ break;
+ }
+ if (longerChainExists)
continue;
// We found an instr that starts a chain. Now follow the chain and try to
@@ -625,7 +641,7 @@ bool Vectorizer::vectorizeInstructions(A
SmallVector<Value *, 16> Operands;
int I = Head;
while (I != -1 && (Tails.count(I) || Heads.count(I))) {
- if (VectorizedValues.count(Instrs[I]))
+ if (InstructionsProcessed.count(Instrs[I]))
break;
Operands.push_back(Instrs[I]);
@@ -634,20 +650,18 @@ bool Vectorizer::vectorizeInstructions(A
bool Vectorized = false;
if (isa<LoadInst>(*Operands.begin()))
- Vectorized = vectorizeLoadChain(Operands);
+ Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
else
- Vectorized = vectorizeStoreChain(Operands);
+ Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
- // Mark the vectorized instructions so that we don't vectorize them again.
- if (Vectorized)
- VectorizedValues.insert(Operands.begin(), Operands.end());
Changed |= Vectorized;
}
return Changed;
}
-bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
+bool Vectorizer::vectorizeStoreChain(
+ ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
StoreInst *S0 = cast<StoreInst>(Chain[0]);
// If the vector has an int element, default to int for the whole load.
@@ -670,8 +684,28 @@ bool Vectorizer::vectorizeStoreChain(Arr
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
- if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
+ if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
+ InstructionsProcessed->insert(Chain.begin(), Chain.end());
+ return false;
+ }
+
+ BasicBlock::iterator First, Last;
+ std::tie(First, Last) = getBoundaryInstrs(Chain);
+ unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
+ if (StopChain == 0) {
+ // There exists a side effect instruction, no vectorization possible.
+ InstructionsProcessed->insert(Chain.begin(), Chain.end());
return false;
+ }
+ if (StopChain == 1) {
+ // Failed after the first instruction. Discard it and try the smaller chain.
+ InstructionsProcessed->insert(Chain.front());
+ return false;
+ }
+
+ // Update Chain to the valid vectorizable subchain.
+ Chain = Chain.slice(0, StopChain);
+ ChainSize = Chain.size();
// Store size should be 1B, 2B or multiple of 4B.
// TODO: Target hook for size constraint?
@@ -680,11 +714,12 @@ bool Vectorizer::vectorizeStoreChain(Arr
DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
"or multiple of 4B. Splitting.\n");
if (SzInBytes == 3)
- return vectorizeStoreChain(Chain.slice(0, ChainSize - 1));
+ return vectorizeStoreChain(Chain.slice(0, ChainSize - 1),
+ InstructionsProcessed);
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeStoreChain(Chains.first) |
- vectorizeStoreChain(Chains.second);
+ return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
+ vectorizeStoreChain(Chains.second, InstructionsProcessed);
}
VectorType *VecTy;
@@ -700,8 +735,8 @@ bool Vectorizer::vectorizeStoreChain(Arr
if (ChainSize > VF) {
DEBUG(dbgs() << "LSV: Vector factor is too big."
" Creating two separate arrays.\n");
- return vectorizeStoreChain(Chain.slice(0, VF)) |
- vectorizeStoreChain(Chain.slice(VF));
+ return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) |
+ vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed);
}
DEBUG({
@@ -710,6 +745,10 @@ bool Vectorizer::vectorizeStoreChain(Arr
V->dump();
});
+ // We won't try again to vectorize the elements of the chain, regardless of
+ // whether we succeed below.
+ InstructionsProcessed->insert(Chain.begin(), Chain.end());
+
// Check alignment restrictions.
unsigned Alignment = getAlignment(S0);
@@ -729,12 +768,6 @@ bool Vectorizer::vectorizeStoreChain(Arr
}
}
- BasicBlock::iterator First, Last;
- std::tie(First, Last) = getBoundaryInstrs(Chain);
-
- if (!isVectorizable(Chain, First, Last))
- return false;
-
// Set insert point.
Builder.SetInsertPoint(&*Last);
@@ -782,7 +815,8 @@ bool Vectorizer::vectorizeStoreChain(Arr
return true;
}
-bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
+bool Vectorizer::vectorizeLoadChain(
+ ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
LoadInst *L0 = cast<LoadInst>(Chain[0]);
// If the vector has an int element, default to int for the whole load.
@@ -805,8 +839,28 @@ bool Vectorizer::vectorizeLoadChain(Arra
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
- if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
+ if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
+ InstructionsProcessed->insert(Chain.begin(), Chain.end());
+ return false;
+ }
+
+ BasicBlock::iterator First, Last;
+ std::tie(First, Last) = getBoundaryInstrs(Chain);
+ unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
+ if (StopChain == 0) {
+ // There exists a side effect instruction, no vectorization possible.
+ InstructionsProcessed->insert(Chain.begin(), Chain.end());
+ return false;
+ }
+ if (StopChain == 1) {
+ // Failed after the first instruction. Discard it and try the smaller chain.
+ InstructionsProcessed->insert(Chain.front());
return false;
+ }
+
+ // Update Chain to the valid vectorizable subchain.
+ Chain = Chain.slice(0, StopChain);
+ ChainSize = Chain.size();
// Load size should be 1B, 2B or multiple of 4B.
// TODO: Should size constraint be a target hook?
@@ -815,9 +869,11 @@ bool Vectorizer::vectorizeLoadChain(Arra
DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
"or multiple of 4B. Splitting.\n");
if (SzInBytes == 3)
- return vectorizeLoadChain(Chain.slice(0, ChainSize - 1));
+ return vectorizeLoadChain(Chain.slice(0, ChainSize - 1),
+ InstructionsProcessed);
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeLoadChain(Chains.first) | vectorizeLoadChain(Chains.second);
+ return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
+ vectorizeLoadChain(Chains.second, InstructionsProcessed);
}
VectorType *VecTy;
@@ -833,10 +889,14 @@ bool Vectorizer::vectorizeLoadChain(Arra
if (ChainSize > VF) {
DEBUG(dbgs() << "LSV: Vector factor is too big. "
"Creating two separate arrays.\n");
- return vectorizeLoadChain(Chain.slice(0, VF)) |
- vectorizeLoadChain(Chain.slice(VF));
+ return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) |
+ vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed);
}
+ // We won't try again to vectorize the elements of the chain, regardless of
+ // whether we succeed below.
+ InstructionsProcessed->insert(Chain.begin(), Chain.end());
+
// Check alignment restrictions.
unsigned Alignment = getAlignment(L0);
@@ -862,12 +922,6 @@ bool Vectorizer::vectorizeLoadChain(Arra
V->dump();
});
- BasicBlock::iterator First, Last;
- std::tie(First, Last) = getBoundaryInstrs(Chain);
-
- if (!isVectorizable(Chain, First, Last))
- return false;
-
// Set insert point.
Builder.SetInsertPoint(&*First);
Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll?rev=275317&r1=275316&r2=275317&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll Wed Jul 13 16:20:01 2016
@@ -48,8 +48,7 @@ define void @interleave_3L_2S_1L(i32* no
; CHECK-LABEL: @chain_suffix(
; CHECK: load i32
; CHECK: store <2 x i32>
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <2 x i32>
define void @chain_suffix(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
@@ -66,12 +65,9 @@ define void @chain_suffix(i32* noalias %
; CHECK-LABEL: @chain_prefix_suffix(
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <2 x i32>
; CHECK: store <2 x i32>
-; CHECK: load i32
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <3 x i32>
define void @chain_prefix_suffix(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
More information about the llvm-commits
mailing list