[llvm] r276056 - [LSV] Insert stores at the right point.
Justin Lebar via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 19 16:19:21 PDT 2016
Author: jlebar
Date: Tue Jul 19 18:19:20 2016
New Revision: 276056
URL: http://llvm.org/viewvc/llvm-project?rev=276056&view=rev
Log:
[LSV] Insert stores at the right point.
Summary:
Previously, the insertion point for stores was the last instruction in
Chain *before calling getVectorizablePrefixEndIdx*. Thus if
getVectorizablePrefixEndIdx didn't return Chain.size(), we still would
insert at the last instruction in Chain.
This patch changes our internal API a bit in an attempt to make it less
prone to this sort of error. As a result, we end up recalculating the
Chain's boundary instructions, but I think worrying about the speed hit
of this is a premature optimization right now.
Reviewers: asbirlea, tstellarAMD
Subscribers: mzolotukhin, arsenm, llvm-commits
Differential Revision: https://reviews.llvm.org/D22534
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp?rev=276056&r1=276055&r2=276056&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Tue Jul 19 18:19:20 2016
@@ -104,13 +104,12 @@ private:
std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
- /// Checks for instructions which may affect the memory accessed
- /// in the chain between \p From and \p To. Returns Index, where
- /// \p Chain[0, Index) is the largest vectorizable chain prefix.
- /// The elements of \p Chain should be all loads or all stores.
- unsigned getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
- BasicBlock::iterator From,
- BasicBlock::iterator To);
+ /// Finds the largest prefix of Chain that's vectorizable, checking for
+ /// intervening instructions which may affect the memory accessed by the
+ /// instructions within Chain.
+ ///
+ /// The elements of \p Chain must be all loads or all stores.
+ ArrayRef<Value *> getVectorizablePrefix(ArrayRef<Value *> Chain);
/// Collects load and store instructions to vectorize.
std::pair<ValueListMap, ValueListMap> collectInstructions(BasicBlock *BB);
@@ -424,14 +423,12 @@ Vectorizer::splitOddVectorElts(ArrayRef<
return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
}
-unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
- BasicBlock::iterator From,
- BasicBlock::iterator To) {
+ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
unsigned InstrIdx = 0;
- for (Instruction &I : make_range(From, To)) {
+ for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
++InstrIdx;
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
if (!is_contained(Chain, &I))
@@ -445,7 +442,7 @@ unsigned Vectorizer::getVectorizablePref
}
assert(Chain.size() == ChainInstrs.size() &&
- "All instructions in the Chain must exist in [From, To).");
+ "All instrs in Chain must be within range getBoundaryInstrs(Chain).");
unsigned ChainIdx = 0;
for (auto EntryChain : ChainInstrs) {
@@ -487,12 +484,12 @@ unsigned Vectorizer::getVectorizablePref
<< " " << *Ptr1 << '\n';
});
- return ChainIdx;
+ return Chain.slice(0, ChainIdx);
}
}
ChainIdx++;
}
- return Chain.size();
+ return Chain;
}
std::pair<ValueListMap, ValueListMap>
@@ -694,22 +691,20 @@ bool Vectorizer::vectorizeStoreChain(
return false;
}
- BasicBlock::iterator First, Last;
- std::tie(First, Last) = getBoundaryInstrs(Chain);
- unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
- if (StopChain == 0) {
+ ArrayRef<Value *> NewChain = getVectorizablePrefix(Chain);
+ if (NewChain.empty()) {
// There exists a side effect instruction, no vectorization possible.
InstructionsProcessed->insert(Chain.begin(), Chain.end());
return false;
}
- if (StopChain == 1) {
+ if (NewChain.size() == 1) {
// Failed after the first instruction. Discard it and try the smaller chain.
- InstructionsProcessed->insert(Chain.front());
+ InstructionsProcessed->insert(NewChain.front());
return false;
}
// Update Chain to the valid vectorizable subchain.
- Chain = Chain.slice(0, StopChain);
+ Chain = NewChain;
ChainSize = Chain.size();
// Store size should be 1B, 2B or multiple of 4B.
@@ -773,7 +768,8 @@ bool Vectorizer::vectorizeStoreChain(
}
}
- // Set insert point.
+ BasicBlock::iterator First, Last;
+ std::tie(First, Last) = getBoundaryInstrs(Chain);
Builder.SetInsertPoint(&*Last);
Value *Vec = UndefValue::get(VecTy);
@@ -849,22 +845,20 @@ bool Vectorizer::vectorizeLoadChain(
return false;
}
- BasicBlock::iterator First, Last;
- std::tie(First, Last) = getBoundaryInstrs(Chain);
- unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
- if (StopChain == 0) {
+ ArrayRef<Value *> NewChain = getVectorizablePrefix(Chain);
+ if (NewChain.empty()) {
// There exists a side effect instruction, no vectorization possible.
InstructionsProcessed->insert(Chain.begin(), Chain.end());
return false;
}
- if (StopChain == 1) {
+ if (NewChain.size() == 1) {
// Failed after the first instruction. Discard it and try the smaller chain.
- InstructionsProcessed->insert(Chain.front());
+ InstructionsProcessed->insert(NewChain.front());
return false;
}
// Update Chain to the valid vectorizable subchain.
- Chain = Chain.slice(0, StopChain);
+ Chain = NewChain;
ChainSize = Chain.size();
// Load size should be 1B, 2B or multiple of 4B.
@@ -927,7 +921,11 @@ bool Vectorizer::vectorizeLoadChain(
V->dump();
});
- // Set insert point.
+ // getVectorizablePrefix already computed getBoundaryInstrs. The value of
+ // Last may have changed since then, but the value of First won't have. If it
+ // matters, we could compute getBoundaryInstrs only once and reuse it here.
+ BasicBlock::iterator First, Last;
+ std::tie(First, Last) = getBoundaryInstrs(Chain);
Builder.SetInsertPoint(&*First);
Value *Bitcast =
Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll?rev=276056&r1=276055&r2=276056&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll Tue Jul 19 18:19:20 2016
@@ -2,8 +2,9 @@
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-; Check relative position of the inserted vector load relative to the existing
-; adds. Vectorized loads should be inserted at the position of the first load.
+; Check position of the inserted vector load/store. Vectorized loads should be
+; inserted at the position of the first load in the chain, and stores should be
+; inserted at the position of the last store.
; CHECK-LABEL: @insert_load_point(
; CHECK: %z = add i32 %x, 4
@@ -59,4 +60,30 @@ entry:
ret void
}
+; Here we have four stores, with an aliasing load before the last one. We can
+; vectorize the first two stores as <2 x float>, but this vectorized store must
+; be inserted at the location of the second scalar store, not the fourth one.
+;
+; CHECK-LABEL: @insert_store_point_alias
+; CHECK: store <2 x float>
+; CHECK: store float
+; CHECK-SAME: %a.idx.2
+; CHECK: load float, float addrspace(1)* %a.idx.2
+; CHECK: store float
+; CHECK-SAME: %a.idx.3
+define float @insert_store_point_alias(float addrspace(1)* nocapture %a, i64 %idx) {
+ %a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
+ %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
+ %a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
+ %a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
+
+ store float 0.0, float addrspace(1)* %a.idx, align 4
+ store float 0.0, float addrspace(1)* %a.idx.1, align 4
+ store float 0.0, float addrspace(1)* %a.idx.2, align 4
+ %x = load float, float addrspace(1)* %a.idx.2, align 4
+ store float 0.0, float addrspace(1)* %a.idx.3, align 4
+
+ ret float %x
+}
+
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list