[llvm] r276072 - [LSV] Don't assume that loads/stores appear in address order in the BB.
Justin Lebar via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 19 17:55:12 PDT 2016
Author: jlebar
Date: Tue Jul 19 19:55:12 2016
New Revision: 276072
URL: http://llvm.org/viewvc/llvm-project?rev=276072&view=rev
Log:
[LSV] Don't assume that loads/stores appear in address order in the BB.
Summary:
getVectorizablePrefix previously didn't work properly in the face of
aliasing loads/stores. It unwittingly assumed that the loads/stores
appeared in the BB in address order. If they didn't, it would do the
wrong thing.
Reviewers: asbirlea, tstellarAMD
Subscribers: arsenm, llvm-commits, mzolotukhin
Differential Revision: https://reviews.llvm.org/D22535
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp?rev=276072&r1=276071&r2=276072&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Tue Jul 19 19:55:12 2016
@@ -108,7 +108,8 @@ private:
/// intervening instructions which may affect the memory accessed by the
/// instructions within Chain.
///
- /// The elements of \p Chain must be all loads or all stores.
+ /// The elements of \p Chain must be all loads or all stores and must be in
+ /// address order.
ArrayRef<Value *> getVectorizablePrefix(ArrayRef<Value *> Chain);
/// Collects load and store instructions to vectorize.
@@ -424,6 +425,7 @@ Vectorizer::splitOddVectorElts(ArrayRef<
}
ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
+ // These are in BB order, unlike Chain, which is in address order.
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
@@ -444,31 +446,34 @@ ArrayRef<Value *> Vectorizer::getVectori
assert(Chain.size() == ChainInstrs.size() &&
"All instrs in Chain must be within range getBoundaryInstrs(Chain).");
- unsigned ChainIdx = 0;
- for (auto EntryChain : ChainInstrs) {
- Value *ChainInstrValue = EntryChain.first;
- unsigned ChainInstrIdx = EntryChain.second;
+ // Loop until we find an instruction in ChainInstrs that we can't vectorize.
+ unsigned ChainInstrIdx, ChainInstrsLen;
+ for (ChainInstrIdx = 0, ChainInstrsLen = ChainInstrs.size();
+ ChainInstrIdx < ChainInstrsLen; ++ChainInstrIdx) {
+ Value *ChainInstr = ChainInstrs[ChainInstrIdx].first;
+ unsigned ChainInstrLoc = ChainInstrs[ChainInstrIdx].second;
+ bool AliasFound = false;
for (auto EntryMem : MemoryInstrs) {
- Value *MemInstrValue = EntryMem.first;
- unsigned MemInstrIdx = EntryMem.second;
- if (isa<LoadInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue))
+ Value *MemInstr = EntryMem.first;
+ unsigned MemInstrLoc = EntryMem.second;
+ if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
continue;
// We can ignore the alias as long as the load comes before the store,
// because that means we won't be moving the load past the store to
// vectorize it (the vectorized load is inserted at the location of the
// first load in the chain).
- if (isa<StoreInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue) &&
- ChainInstrIdx < MemInstrIdx)
+ if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
+ ChainInstrLoc < MemInstrLoc)
continue;
// Same case, but in reverse.
- if (isa<LoadInst>(MemInstrValue) && isa<StoreInst>(ChainInstrValue) &&
- ChainInstrIdx > MemInstrIdx)
+ if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
+ ChainInstrLoc > MemInstrLoc)
continue;
- Instruction *M0 = cast<Instruction>(MemInstrValue);
- Instruction *M1 = cast<Instruction>(ChainInstrValue);
+ Instruction *M0 = cast<Instruction>(MemInstr);
+ Instruction *M1 = cast<Instruction>(ChainInstr);
if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) {
DEBUG({
@@ -477,19 +482,34 @@ ArrayRef<Value *> Vectorizer::getVectori
dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n"
- << " " << *MemInstrValue << '\n'
+ << " " << *MemInstr << '\n'
<< " " << *Ptr0 << '\n'
<< " Aliased instruction and pointer:\n"
- << " " << *ChainInstrValue << '\n'
+ << " " << *ChainInstr << '\n'
<< " " << *Ptr1 << '\n';
});
-
- return Chain.slice(0, ChainIdx);
+ AliasFound = true;
+ break;
}
}
- ChainIdx++;
+ if (AliasFound)
+ break;
+ }
+
+ // Find the largest prefix of Chain whose elements are all in
+ // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
+ // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
+ // order.)
+ auto VectorizableChainInstrs =
+ makeArrayRef(ChainInstrs.data(), ChainInstrIdx);
+ unsigned ChainIdx, ChainLen;
+ for (ChainIdx = 0, ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
+ Value *V = Chain[ChainIdx];
+ if (!any_of(VectorizableChainInstrs,
+ [V](std::pair<Value *, unsigned> CI) { return V == CI.first; }))
+ break;
}
- return Chain;
+ return Chain.slice(0, ChainIdx);
}
std::pair<ValueListMap, ValueListMap>
Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll?rev=276072&r1=276071&r2=276072&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll Tue Jul 19 19:55:12 2016
@@ -86,4 +86,34 @@ define float @insert_store_point_alias(f
ret float %x
}
+; Here we have four stores, with an aliasing load before the last one. We
+; could vectorize two of the stores before the load (although we currently
+; don't), but the important thing is that we *don't* sink the store to
+; a[idx + 1] below the load.
+;
+; CHECK-LABEL: @insert_store_point_alias_ooo
+; CHECK: store float
+; CHECK-SAME: %a.idx.3
+; CHECK: store float
+; CHECK-SAME: %a.idx.1
+; CHECK: store float
+; CHECK-SAME: %a.idx.2
+; CHECK: load float, float addrspace(1)* %a.idx.2
+; CHECK: store float
+; CHECK-SAME: %a.idx
+define float @insert_store_point_alias_ooo(float addrspace(1)* nocapture %a, i64 %idx) {
+ %a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
+ %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
+ %a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
+ %a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
+
+ store float 0.0, float addrspace(1)* %a.idx.3, align 4
+ store float 0.0, float addrspace(1)* %a.idx.1, align 4
+ store float 0.0, float addrspace(1)* %a.idx.2, align 4
+ %x = load float, float addrspace(1)* %a.idx.2, align 4
+ store float 0.0, float addrspace(1)* %a.idx, align 4
+
+ ret float %x
+}
+
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list