[llvm-commits] [llvm] r159332 - /llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
Hal Finkel
hfinkel at anl.gov
Wed Jun 27 22:42:45 PDT 2012
Author: hfinkel
Date: Thu Jun 28 00:42:45 2012
New Revision: 159332
URL: http://llvm.org/viewvc/llvm-project?rev=159332&view=rev
Log:
Precompute SCEV pointer analysis prior to instruction fusion in BBVectorize.
When both a load/store and its address computation are being vectorized, it can
happen that the address-computation vectorization destroys SCEV's ability
to analyize the relative pointer offsets. As a result (like with the aliasing
analysis info), we need to precompute the necessary information prior to
instruction fusing.
This was found during stress testing (running through the test suite with a very
low required chain length); unfortunately, I don't have a small test case.
Modified:
llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp?rev=159332&r1=159331&r2=159332&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp Thu Jun 28 00:42:45 2012
@@ -277,7 +277,7 @@
bool UseCycleCheck);
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool &FlipMemInputs);
+ Instruction *J, unsigned o, bool FlipMemInputs);
void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
unsigned MaskOffset, unsigned NumInElem,
@@ -297,12 +297,12 @@
void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
- bool &FlipMemInputs);
+ bool FlipMemInputs);
void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
Instruction *J, Instruction *K,
Instruction *&InsertionPt, Instruction *&K1,
- Instruction *&K2, bool &FlipMemInputs);
+ Instruction *&K2, bool FlipMemInputs);
void collectPairLoadMoveSet(BasicBlock &BB,
DenseMap<Value *, Value *> &ChosenPairs,
@@ -314,6 +314,10 @@
DenseMap<Value *, Value *> &ChosenPairs,
std::multimap<Value *, Value *> &LoadMoveSet);
+ void collectPtrInfo(std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<Value *> &LowPtrInsts);
+
bool canMoveUsesOfIAfterJ(BasicBlock &BB,
std::multimap<Value *, Value *> &LoadMoveSet,
Instruction *I, Instruction *J);
@@ -1487,19 +1491,21 @@
// instruction that fuses I with J.
Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
Instruction *I, Instruction *J, unsigned o,
- bool &FlipMemInputs) {
+ bool FlipMemInputs) {
Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment;
int64_t OffsetInElmts;
+
+ // Note: the analysis might fail here, that is why FlipMemInputs has
+ // been precomputed (OffsetInElmts must be unused here).
(void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
OffsetInElmts);
// The pointer value is taken to be the one with the lowest offset.
Value *VPtr;
- if (OffsetInElmts > 0) {
+ if (!FlipMemInputs) {
VPtr = IPtr;
} else {
- FlipMemInputs = true;
VPtr = JPtr;
}
@@ -1970,8 +1976,7 @@
void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
Instruction *I, Instruction *J,
SmallVector<Value *, 3> &ReplacedOperands,
- bool &FlipMemInputs) {
- FlipMemInputs = false;
+ bool FlipMemInputs) {
unsigned NumOperands = I->getNumOperands();
for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
@@ -2022,7 +2027,7 @@
Instruction *J, Instruction *K,
Instruction *&InsertionPt,
Instruction *&K1, Instruction *&K2,
- bool &FlipMemInputs) {
+ bool FlipMemInputs) {
if (isa<StoreInst>(I)) {
AA->replaceWithNewValue(I, K);
AA->replaceWithNewValue(J, K);
@@ -2176,6 +2181,36 @@
}
}
+ // As with the aliasing information, SCEV can also change because of
+ // vectorization. This information is used to compute relative pointer
+ // offsets; the necessary information will be cached here prior to
+ // fusion.
+ void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<Value *> &LowPtrInsts) {
+ for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+ PIE = PairableInsts.end(); PI != PIE; ++PI) {
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
+ if (P == ChosenPairs.end()) continue;
+
+ Instruction *I = cast<Instruction>(P->first);
+ Instruction *J = cast<Instruction>(P->second);
+
+ if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ continue;
+
+ Value *IPtr, *JPtr;
+ unsigned IAlignment, JAlignment;
+ int64_t OffsetInElmts;
+ if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+ OffsetInElmts) || abs64(OffsetInElmts) != 1)
+ llvm_unreachable("Pre-fusion pointer analysis failed");
+
+ Value *LowPI = (OffsetInElmts > 0) ? I : J;
+ LowPtrInsts.insert(LowPI);
+ }
+ }
+
// When the first instruction in each pair is cloned, it will inherit its
// parent's metadata. This metadata must be combined with that of the other
// instruction in a safe way.
@@ -2227,6 +2262,9 @@
std::multimap<Value *, Value *> LoadMoveSet;
collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
+ DenseSet<Value *> LowPtrInsts;
+ collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts);
+
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
@@ -2266,7 +2304,10 @@
continue;
}
- bool FlipMemInputs;
+ bool FlipMemInputs = false;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end());
+
unsigned NumOperands = I->getNumOperands();
SmallVector<Value *, 3> ReplacedOperands(NumOperands);
getReplacementInputsForPair(Context, I, J, ReplacedOperands,
More information about the llvm-commits
mailing list