[llvm] r306338 - reverting 306331.
Ayal Zaks via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 26 15:26:54 PDT 2017
Author: ayalz
Date: Mon Jun 26 15:26:54 2017
New Revision: 306338
URL: http://llvm.org/viewvc/llvm-project?rev=306338&view=rev
Log:
reverting 306331.
Causes TBAA metadata to be generates on reverse shuffles, investigating.
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=306338&r1=306337&r2=306338&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Mon Jun 26 15:26:54 2017
@@ -532,34 +532,21 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
- /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a
- /// vector or scalar value on-demand if one is not yet available. When
- /// vectorizing a loop, we visit the definition of an instruction before its
- /// uses. When visiting the definition, we either vectorize or scalarize the
- /// instruction, creating an entry for it in the corresponding map. (In some
- /// cases, such as induction variables, we will create both vector and scalar
- /// entries.) Then, as we encounter uses of the definition, we derive values
- /// for each scalar or vector use unless such a value is already available.
- /// For example, if we scalarize a definition and one of its uses is vector,
- /// we build the required vector on-demand with an insertelement sequence
- /// when visiting the use. Otherwise, if the use is scalar, we can use the
- /// existing scalar definition.
- ///
- /// Return a value in the new loop corresponding to \p V from the original
- /// loop at unroll index \p Part. If the value has already been vectorized,
- /// the corresponding vector entry in VectorLoopValueMap is returned. If,
+ /// Return a constant reference to the VectorParts corresponding to \p V from
+ /// the original loop. If the value has already been vectorized, the
+ /// corresponding vector entry in VectorLoopValueMap is returned. If,
/// however, the value has a scalar entry in VectorLoopValueMap, we construct
- /// a new vector value on-demand by inserting the scalar values into a vector
+ /// new vector values on-demand by inserting the scalar values into vectors
/// with an insertelement sequence. If the value has been neither vectorized
/// nor scalarized, it must be loop invariant, so we simply broadcast the
- /// value into a vector.
- Value *getOrCreateVectorValue(Value *V, unsigned Part);
+ /// value into vectors.
+ const VectorParts &getVectorValue(Value *V);
/// Return a value in the new loop corresponding to \p V from the original
/// loop at unroll index \p Part and vector index \p Lane. If the value has
/// been vectorized but not scalarized, the necessary extractelement
/// instruction will be generated.
- Value *getOrCreateScalarValue(Value *V, unsigned Part, unsigned Lane);
+ Value *getScalarValue(Value *V, unsigned Part, unsigned Lane);
/// Try to vectorize the interleaved access group that \p Instr belongs to.
void vectorizeInterleaveGroup(Instruction *Instr);
@@ -614,103 +601,90 @@ protected:
/// UF x VF scalar values in the new loop. UF and VF are the unroll and
/// vectorization factors, respectively.
///
- /// Entries can be added to either map with setVectorValue and setScalarValue,
- /// which assert that an entry was not already added before. If an entry is to
- /// replace an existing one, call resetVectorValue. This is currently needed
- /// to modify the mapped values during "fix-up" operations that occur once the
- /// first phase of widening is complete. These operations include type
- /// truncation and the second phase of recurrence widening.
+ /// Entries can be added to either map with initVector and initScalar, which
+ /// initialize and return a constant reference to the new entry. If a
+ /// non-constant reference to a vector entry is required, getVector can be
+ /// used to retrieve a mutable entry. We currently directly modify the mapped
+ /// values during "fix-up" operations that occur once the first phase of
+ /// widening is complete. These operations include type truncation and the
+ /// second phase of recurrence widening.
///
- /// Entries from either map can be retrieved using the getVectorValue and
- /// getScalarValue functions, which assert that the desired value exists.
-
+ /// Otherwise, entries from either map should be accessed using the
+ /// getVectorValue or getScalarValue functions from InnerLoopVectorizer.
+ /// getVectorValue and getScalarValue coordinate to generate a vector or
+ /// scalar value on-demand if one is not yet available. When vectorizing a
+ /// loop, we visit the definition of an instruction before its uses. When
+ /// visiting the definition, we either vectorize or scalarize the
+ /// instruction, creating an entry for it in the corresponding map. (In some
+ /// cases, such as induction variables, we will create both vector and scalar
+ /// entries.) Then, as we encounter uses of the definition, we derive values
+ /// for each scalar or vector use unless such a value is already available.
+ /// For example, if we scalarize a definition and one of its uses is vector,
+ /// we build the required vector on-demand with an insertelement sequence
+ /// when visiting the use. Otherwise, if the use is scalar, we can use the
+ /// existing scalar definition.
struct ValueMap {
/// Construct an empty map with the given unroll and vectorization factors.
- ValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
-
- /// \return True if the map has any vector entry for \p Key.
- bool hasAnyVectorValue(Value *Key) const {
- return VectorMapStorage.count(Key);
- }
-
- /// \return True if the map has a vector entry for \p Key and \p Part.
- bool hasVectorValue(Value *Key, unsigned Part) const {
- assert(Part < UF && "Queried Vector Part is too large.");
- if (!hasAnyVectorValue(Key))
- return false;
- const VectorParts &Entry = VectorMapStorage.find(Key)->second;
- assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
- return Entry[Part] != nullptr;
- }
-
- /// \return True if the map has any scalar entry for \p Key.
- bool hasAnyScalarValue(Value *Key) const {
- return ScalarMapStorage.count(Key);
- }
-
- /// \return True if the map has a scalar entry for \p Key, \p Part and
- /// \p Part.
- bool hasScalarValue(Value *Key, unsigned Part, unsigned Lane) const {
- assert(Part < UF && "Queried Scalar Part is too large.");
- assert(Lane < VF && "Queried Scalar Lane is too large.");
- if (!hasAnyScalarValue(Key))
- return false;
- const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
- assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
- assert(Entry[Part].size() == VF && "ScalarParts has wrong dimensions.");
- return Entry[Part][Lane] != nullptr;
- }
-
- /// Retrieve the existing vector value that corresponds to \p Key and
- /// \p Part.
- Value *getVectorValue(Value *Key, unsigned Part) {
- assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
- return VectorMapStorage[Key][Part];
- }
-
- /// Retrieve the existing scalar value that corresponds to \p Key, \p Part
- /// and \p Lane.
- Value *getScalarValue(Value *Key, unsigned Part, unsigned Lane) {
- assert(hasScalarValue(Key, Part, Lane) && "Getting non-existent value.");
- return ScalarMapStorage[Key][Part][Lane];
- }
-
- /// Set a vector value associated with \p Key and \p Part. Assumes such a
- /// value is not already set. If it is, use resetVectorValue() instead.
- void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
- assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
- if (!VectorMapStorage.count(Key)) {
- VectorParts Entry(UF);
- VectorMapStorage[Key] = Entry;
- }
- VectorMapStorage[Key][Part] = Vector;
- }
-
- /// Set a scalar value associated with \p Key for \p Part and \p Lane.
- /// Assumes such a value is not already set.
- void setScalarValue(Value *Key, unsigned Part, unsigned Lane,
- Value *Scalar) {
- assert(!hasScalarValue(Key, Part, Lane) && "Scalar value already set");
- if (!ScalarMapStorage.count(Key)) {
- ScalarParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part].resize(VF, nullptr);
- // TODO: Consider storing uniform values only per-part, as they occupy
- // lane 0 only, keeping the other VF-1 redundant entries null.
- ScalarMapStorage[Key] = Entry;
- }
- ScalarMapStorage[Key][Part][Lane] = Scalar;
- }
-
- /// Reset the vector value associated with \p Key for the given \p Part.
- /// This function can be used to update values that have already been
- /// vectorized. This is the case for "fix-up" operations including type
- /// truncation and the second phase of recurrence vectorization.
- void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
- assert(hasVectorValue(Key, Part) && "Vector value not set for part");
- VectorMapStorage[Key][Part] = Vector;
- }
+ ValueMap(unsigned UnrollFactor, unsigned VecWidth)
+ : UF(UnrollFactor), VF(VecWidth) {
+ // The unroll and vectorization factors are only used in asserts builds
+ // to verify map entries are sized appropriately.
+ (void)UF;
+ (void)VF;
+ }
+
+ /// \return True if the map has a vector entry for \p Key.
+ bool hasVector(Value *Key) const { return VectorMapStorage.count(Key); }
+
+ /// \return True if the map has a scalar entry for \p Key.
+ bool hasScalar(Value *Key) const { return ScalarMapStorage.count(Key); }
+
+ /// \brief Map \p Key to the given VectorParts \p Entry, and return a
+ /// constant reference to the new vector map entry. The given key should
+ /// not already be in the map, and the given VectorParts should be
+ /// correctly sized for the current unroll factor.
+ const VectorParts &initVector(Value *Key, const VectorParts &Entry) {
+ assert(!hasVector(Key) && "Vector entry already initialized");
+ assert(Entry.size() == UF && "VectorParts has wrong dimensions");
+ VectorMapStorage[Key] = Entry;
+ return VectorMapStorage[Key];
+ }
+
+ /// \brief Map \p Key to the given ScalarParts \p Entry, and return a
+ /// constant reference to the new scalar map entry. The given key should
+ /// not already be in the map, and the given ScalarParts should be
+ /// correctly sized for the current unroll and vectorization factors.
+ const ScalarParts &initScalar(Value *Key, const ScalarParts &Entry) {
+ assert(!hasScalar(Key) && "Scalar entry already initialized");
+ assert(Entry.size() == UF &&
+ all_of(make_range(Entry.begin(), Entry.end()),
+ [&](const SmallVectorImpl<Value *> &Values) -> bool {
+ return Values.size() == VF;
+ }) &&
+ "ScalarParts has wrong dimensions");
+ ScalarMapStorage[Key] = Entry;
+ return ScalarMapStorage[Key];
+ }
+
+ /// \return A reference to the vector map entry corresponding to \p Key.
+ /// The key should already be in the map. This function should only be used
+ /// when it's necessary to update values that have already been vectorized.
+ /// This is the case for "fix-up" operations including type truncation and
+ /// the second phase of recurrence vectorization. If a non-const reference
+ /// isn't required, getVectorValue should be used instead.
+ VectorParts &getVector(Value *Key) {
+ assert(hasVector(Key) && "Vector entry not initialized");
+ return VectorMapStorage.find(Key)->second;
+ }
+
+ /// Retrieve an entry from the vector or scalar maps. The preferred way to
+ /// access an existing mapped entry is with getVectorValue or
+ /// getScalarValue from InnerLoopVectorizer. Until those functions can be
+ /// moved inside ValueMap, we have to declare them as friends.
+ friend const VectorParts &InnerLoopVectorizer::getVectorValue(Value *V);
+ friend Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
+ unsigned Lane);
private:
/// The unroll factor. Each entry in the vector map contains UF vector
@@ -2443,13 +2417,15 @@ void InnerLoopVectorizer::createVectorIn
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
&*LoopVectorBody->getFirstInsertionPt());
Instruction *LastInduction = VecInd;
+ VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
- if (isa<TruncInst>(EntryVal))
- addMetadata(LastInduction, EntryVal);
+ Entry[Part] = LastInduction;
LastInduction = cast<Instruction>(addFastMathFlag(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
}
+ VectorLoopValueMap.initVector(EntryVal, Entry);
+ if (isa<TruncInst>(EntryVal))
+ addMetadata(Entry, EntryVal);
// Move the last step to the end of the latch block. This ensures consistent
// placement of all induction updates.
@@ -2555,13 +2531,13 @@ void InnerLoopVectorizer::widenIntOrFpIn
// induction variable, and build the necessary step vectors.
if (!VectorizedIV) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *EntryPart =
+ VectorParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] =
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
- VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
- if (Trunc)
- addMetadata(EntryPart, Trunc);
- }
+ VectorLoopValueMap.initVector(EntryVal, Entry);
+ if (Trunc)
+ addMetadata(Entry, Trunc);
}
// If an induction variable is only used for counting loop iterations or
@@ -2661,14 +2637,17 @@ void InnerLoopVectorizer::buildScalarSte
Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF) ? 1 : VF;
// Compute the scalar steps and save the results in VectorLoopValueMap.
+ ScalarParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
+ Entry[Part].resize(VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane);
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
- VectorLoopValueMap.setScalarValue(EntryVal, Part, Lane, Add);
+ Entry[Part][Lane] = Add;
}
}
+ VectorLoopValueMap.initScalar(EntryVal, Entry);
}
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
@@ -2686,7 +2665,8 @@ bool LoopVectorizationLegality::isUnifor
return LAI->isUniform(V);
}
-Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
+const InnerLoopVectorizer::VectorParts &
+InnerLoopVectorizer::getVectorValue(Value *V) {
assert(V != Induction && "The new induction variable should not be used.");
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
assert(!V->getType()->isVoidTy() && "Type does not produce a value");
@@ -2695,16 +2675,17 @@ Value *InnerLoopVectorizer::getOrCreateV
if (Legal->hasStride(V))
V = ConstantInt::get(V->getType(), 1);
- // If we have a vector mapped to this value, return it.
- if (VectorLoopValueMap.hasVectorValue(V, Part))
- return VectorLoopValueMap.getVectorValue(V, Part);
+ // If we have this scalar in the map, return it.
+ if (VectorLoopValueMap.hasVector(V))
+ return VectorLoopValueMap.VectorMapStorage[V];
// If the value has not been vectorized, check if it has been scalarized
// instead. If it has been scalarized, and we actually need the value in
// vector form, we will construct the vector values on demand.
- if (VectorLoopValueMap.hasAnyScalarValue(V)) {
+ if (VectorLoopValueMap.hasScalar(V)) {
- Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, Part, 0);
+ // Initialize a new vector map entry.
+ VectorParts Entry(UF);
// If we've scalarized a value, that value should be an instruction.
auto *I = cast<Instruction>(V);
@@ -2712,8 +2693,9 @@ Value *InnerLoopVectorizer::getOrCreateV
// If we aren't vectorizing, we can just copy the scalar map values over to
// the vector map.
if (VF == 1) {
- VectorLoopValueMap.setVectorValue(V, Part, ScalarValue);
- return ScalarValue;
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = getScalarValue(V, Part, 0);
+ return VectorLoopValueMap.initVector(V, Entry);
}
// Get the last scalar instruction we generated for V. If the value is
@@ -2721,8 +2703,7 @@ Value *InnerLoopVectorizer::getOrCreateV
// of the last unroll iteration. Otherwise, the last instruction is the one
// we created for the last vector lane of the last unroll iteration.
unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1;
- auto *LastInst =
- cast<Instruction>(getOrCreateScalarValue(V, UF - 1, LastLane));
+ auto *LastInst = cast<Instruction>(getScalarValue(V, UF - 1, LastLane));
// Set the insert point after the last scalarized instruction. This ensures
// the insertelement sequence will directly follow the scalar definitions.
@@ -2736,50 +2717,52 @@ Value *InnerLoopVectorizer::getOrCreateV
// iteration. Otherwise, we construct the vector values using insertelement
// instructions. Since the resulting vectors are stored in
// VectorLoopValueMap, we will only generate the insertelements once.
- Value *VectorValue = nullptr;
- if (Cost->isUniformAfterVectorization(I, VF)) {
- VectorValue = getBroadcastInstrs(ScalarValue);
- } else {
- VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
- for (unsigned Lane = 0; Lane < VF; ++Lane)
- VectorValue = Builder.CreateInsertElement(
- VectorValue, getOrCreateScalarValue(V, Part, Lane),
- Builder.getInt32(Lane));
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *VectorValue = nullptr;
+ if (Cost->isUniformAfterVectorization(I, VF)) {
+ VectorValue = getBroadcastInstrs(getScalarValue(V, Part, 0));
+ } else {
+ VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
+ for (unsigned Lane = 0; Lane < VF; ++Lane)
+ VectorValue = Builder.CreateInsertElement(
+ VectorValue, getScalarValue(V, Part, Lane),
+ Builder.getInt32(Lane));
+ }
+ Entry[Part] = VectorValue;
}
- VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
Builder.restoreIP(OldIP);
- return VectorValue;
+ return VectorLoopValueMap.initVector(V, Entry);
}
// If this scalar is unknown, assume that it is a constant or that it is
// loop invariant. Broadcast V and save the value for future uses.
Value *B = getBroadcastInstrs(V);
- VectorLoopValueMap.setVectorValue(V, Part, B);
- return B;
+ return VectorLoopValueMap.initVector(V, VectorParts(UF, B));
}
-Value *InnerLoopVectorizer::getOrCreateScalarValue(Value *V, unsigned Part,
- unsigned Lane) {
+Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
+ unsigned Lane) {
// If the value is not an instruction contained in the loop, it should
// already be scalar.
if (OrigLoop->isLoopInvariant(V))
return V;
- assert(Lane > 0 ? !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
- : true && "Uniform values only have lane zero");
+ assert(Lane > 0 ?
+ !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
+ : true && "Uniform values only have lane zero");
// If the value from the original loop has not been vectorized, it is
// represented by UF x VF scalar values in the new loop. Return the requested
// scalar value.
- if (VectorLoopValueMap.hasScalarValue(V, Part, Lane))
- return VectorLoopValueMap.getScalarValue(V, Part, Lane);
+ if (VectorLoopValueMap.hasScalar(V))
+ return VectorLoopValueMap.ScalarMapStorage[V][Part][Lane];
// If the value has not been scalarized, get its entry in VectorLoopValueMap
// for the given unroll part. If this entry is not a vector type (i.e., the
// vectorization factor is one), there is no need to generate an
// extractelement instruction.
- auto *U = getOrCreateVectorValue(V, Part);
+ auto *U = getVectorValue(V)[Part];
if (!U->getType()->isVectorTy()) {
assert(VF == 1 && "Value not scalarized has non-vector type");
return U;
@@ -2861,7 +2844,7 @@ void InnerLoopVectorizer::vectorizeInter
Index += (VF - 1) * Group->getFactor();
for (unsigned Part = 0; Part < UF; Part++) {
- Value *NewPtr = getOrCreateScalarValue(Ptr, Part, 0);
+ Value *NewPtr = getScalarValue(Ptr, Part, 0);
// Notice current instruction could be any index. Need to adjust the address
// to the member of index 0.
@@ -2904,6 +2887,7 @@ void InnerLoopVectorizer::vectorizeInter
if (!Member)
continue;
+ VectorParts Entry(UF);
Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);
for (unsigned Part = 0; Part < UF; Part++) {
Value *StridedVec = Builder.CreateShuffleVector(
@@ -2915,11 +2899,10 @@ void InnerLoopVectorizer::vectorizeInter
StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy);
}
- if (Group->isReverse())
- StridedVec = reverseVector(StridedVec);
-
- VectorLoopValueMap.setVectorValue(Member, Part, StridedVec);
+ Entry[Part] =
+ Group->isReverse() ? reverseVector(StridedVec) : StridedVec;
}
+ VectorLoopValueMap.initVector(Member, Entry);
}
return;
}
@@ -2936,8 +2919,8 @@ void InnerLoopVectorizer::vectorizeInter
Instruction *Member = Group->getMember(i);
assert(Member && "Fail to get a member from an interleaved store group");
- Value *StoredVec = getOrCreateVectorValue(
- cast<StoreInst>(Member)->getValueOperand(), Part);
+ Value *StoredVec =
+ getVectorValue(cast<StoreInst>(Member)->getValueOperand())[Part];
if (Group->isReverse())
StoredVec = reverseVector(StoredVec);
@@ -2998,14 +2981,16 @@ void InnerLoopVectorizer::vectorizeMemor
bool CreateGatherScatter =
(Decision == LoopVectorizationCostModel::CM_GatherScatter);
- // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector
- // gather/scatter. Otherwise Decision should have been to Scalarize.
- assert((ConsecutiveStride || CreateGatherScatter) &&
- "The instruction should be scalarized");
+ VectorParts VectorGep;
// Handle consecutive loads/stores.
- if (ConsecutiveStride)
- Ptr = getOrCreateScalarValue(Ptr, 0, 0);
+ if (ConsecutiveStride) {
+ Ptr = getScalarValue(Ptr, 0, 0);
+ } else {
+ // At this point we should vector version of GEP for Gather or Scatter
+ assert(CreateGatherScatter && "The instruction should be scalarized");
+ VectorGep = getVectorValue(Ptr);
+ }
VectorParts Mask = createBlockInMask(Instr->getParent());
// Handle Stores:
@@ -3013,15 +2998,16 @@ void InnerLoopVectorizer::vectorizeMemor
assert(!Legal->isUniform(SI->getPointerOperand()) &&
"We do not allow storing to uniform addresses");
setDebugLocFromInst(Builder, SI);
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't use a reference type for "StoredVal".
+ VectorParts StoredVal = getVectorValue(SI->getValueOperand());
for (unsigned Part = 0; Part < UF; ++Part) {
Instruction *NewSI = nullptr;
- Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(SI) ? Mask[Part] : nullptr;
- Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
- NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
- MaskPart);
+ NewSI = Builder.CreateMaskedScatter(StoredVal[Part], VectorGep[Part],
+ Alignment, MaskPart);
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
@@ -3030,7 +3016,7 @@ void InnerLoopVectorizer::vectorizeMemor
if (Reverse) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
- StoredVal = reverseVector(StoredVal);
+ StoredVal[Part] = reverseVector(StoredVal[Part]);
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr =
@@ -3044,10 +3030,11 @@ void InnerLoopVectorizer::vectorizeMemor
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
if (Legal->isMaskRequired(SI))
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
+ NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment,
Mask[Part]);
else
- NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+ NewSI =
+ Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
}
addMetadata(NewSI, SI);
}
@@ -3057,13 +3044,14 @@ void InnerLoopVectorizer::vectorizeMemor
// Handle loads.
assert(LI && "Must have a load instruction");
setDebugLocFromInst(Builder, LI);
+ VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *NewLI;
+ Instruction *NewLI;
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
- Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
- NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart,
+ NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart,
nullptr, "wide.masked.gather");
+ Entry[Part] = NewLI;
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
@@ -3085,12 +3073,11 @@ void InnerLoopVectorizer::vectorizeMemor
"wide.masked.load");
else
NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
- if (Reverse)
- NewLI = reverseVector(NewLI);
+ Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
}
addMetadata(NewLI, LI);
- VectorLoopValueMap.setVectorValue(Instr, Part, NewLI);
}
+ VectorLoopValueMap.initVector(Instr, Entry);
}
void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
@@ -3107,6 +3094,9 @@ void InnerLoopVectorizer::scalarizeInstr
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
+ // Initialize a new scalar map entry.
+ ScalarParts Entry(UF);
+
VectorParts Cond;
if (IfPredicateInstr)
Cond = createBlockInMask(Instr->getParent());
@@ -3118,6 +3108,7 @@ void InnerLoopVectorizer::scalarizeInstr
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
+ Entry[Part].resize(VF);
// For each scalar that we create:
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
@@ -3138,7 +3129,7 @@ void InnerLoopVectorizer::scalarizeInstr
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
- auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Part, Lane);
+ auto *NewOp = getScalarValue(Instr->getOperand(op), Part, Lane);
Cloned->setOperand(op, NewOp);
}
addNewMetadata(Cloned, Instr);
@@ -3147,7 +3138,7 @@ void InnerLoopVectorizer::scalarizeInstr
Builder.Insert(Cloned);
// Add the cloned scalar to the scalar map entry.
- VectorLoopValueMap.setScalarValue(Instr, Part, Lane, Cloned);
+ Entry[Part][Lane] = Cloned;
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
@@ -3159,6 +3150,7 @@ void InnerLoopVectorizer::scalarizeInstr
PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
}
}
+ VectorLoopValueMap.initScalar(Instr, Entry);
}
PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
@@ -3794,10 +3786,10 @@ void InnerLoopVectorizer::truncateToMini
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from VectorLoopValueMap indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
+ if (!VectorLoopValueMap.hasVector(KV.first))
continue;
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *I = getOrCreateVectorValue(KV.first, Part);
+ VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
+ for (Value *&I : Parts) {
if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
continue;
Type *OriginalTy = I->getType();
@@ -3886,7 +3878,7 @@ void InnerLoopVectorizer::truncateToMini
I->replaceAllUsesWith(Res);
cast<Instruction>(I)->eraseFromParent();
Erased.insert(I);
- VectorLoopValueMap.resetVectorValue(KV.first, Part, Res);
+ I = Res;
}
}
@@ -3895,15 +3887,15 @@ void InnerLoopVectorizer::truncateToMini
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from VectorLoopValueMap indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
+ if (!VectorLoopValueMap.hasVector(KV.first))
continue;
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *I = getOrCreateVectorValue(KV.first, Part);
+ VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
+ for (Value *&I : Parts) {
ZExtInst *Inst = dyn_cast<ZExtInst>(I);
if (Inst && Inst->use_empty()) {
Value *NewI = Inst->getOperand(0);
Inst->eraseFromParent();
- VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI);
+ I = NewI;
}
}
}
@@ -4033,8 +4025,8 @@ void InnerLoopVectorizer::fixFirstOrderR
// We constructed a temporary phi node in the first phase of vectorization.
// This phi node will eventually be deleted.
- Builder.SetInsertPoint(
- cast<Instruction>(VectorLoopValueMap.getVectorValue(Phi, 0)));
+ VectorParts &PhiParts = VectorLoopValueMap.getVector(Phi);
+ Builder.SetInsertPoint(cast<Instruction>(PhiParts[0]));
// Create a phi node for the new recurrence. The current value will either be
// the initial value inserted into a vector or loop-varying vector value.
@@ -4042,19 +4034,19 @@ void InnerLoopVectorizer::fixFirstOrderR
VecPhi->addIncoming(VectorInit, LoopVectorPreHeader);
// Get the vectorized previous value.
- Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1);
+ auto &PreviousParts = getVectorValue(Previous);
// Set the insertion point after the previous value if it is an instruction.
// Note that the previous value may have been constant-folded so it is not
// guaranteed to be an instruction in the vector loop. Also, if the previous
// value is a phi node, we should insert after all the phi nodes to avoid
// breaking basic block verification.
- if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) ||
- isa<PHINode>(PreviousLastPart))
+ if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1]) ||
+ isa<PHINode>(PreviousParts[UF - 1]))
Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
else
Builder.SetInsertPoint(
- &*++BasicBlock::iterator(cast<Instruction>(PreviousLastPart)));
+ &*++BasicBlock::iterator(cast<Instruction>(PreviousParts[UF - 1])));
// We will construct a vector for the recurrence by combining the values for
// the current and previous iterations. This is the required shuffle mask.
@@ -4069,16 +4061,15 @@ void InnerLoopVectorizer::fixFirstOrderR
// Shuffle the current and previous vector and update the vector parts.
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *PreviousPart = getOrCreateVectorValue(Previous, Part);
- Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part);
auto *Shuffle =
- VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart,
- ConstantVector::get(ShuffleMask))
- : Incoming;
- PhiPart->replaceAllUsesWith(Shuffle);
- cast<Instruction>(PhiPart)->eraseFromParent();
- VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle);
- Incoming = PreviousPart;
+ VF > 1
+ ? Builder.CreateShuffleVector(Incoming, PreviousParts[Part],
+ ConstantVector::get(ShuffleMask))
+ : Incoming;
+ PhiParts[Part]->replaceAllUsesWith(Shuffle);
+ cast<Instruction>(PhiParts[Part])->eraseFromParent();
+ PhiParts[Part] = Shuffle;
+ Incoming = PreviousParts[Part];
}
// Fix the latch value of the new recurrence in the vector loop.
@@ -4106,7 +4097,7 @@ void InnerLoopVectorizer::fixFirstOrderR
// `Incoming`. This is analogous to the vectorized case above: extracting the
// second last element when VF > 1.
else if (UF > 1)
- ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2);
+ ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2];
// Fix the initial value of the original recurrence in the scalar loop.
Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
@@ -4157,7 +4148,8 @@ void InnerLoopVectorizer::fixReduction(P
Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
// This is the vector-clone of the value that leaves the loop.
- Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
+ const VectorParts &VectorExit = getVectorValue(LoopExitInst);
+ Type *VecTy = VectorExit[0]->getType();
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
@@ -4195,17 +4187,18 @@ void InnerLoopVectorizer::fixReduction(P
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
+ const VectorParts &VecRdxPhi = getVectorValue(Phi);
BasicBlock *Latch = OrigLoop->getLoopLatch();
Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);
- Value *Val = getOrCreateVectorValue(LoopVal, Part);
+ const VectorParts &Val = getVectorValue(LoopVal);
+ for (unsigned part = 0; part < UF; ++part) {
// Make sure to add the reduction stat value only to the
// first unroll part.
- Value *StartVal = (Part == 0) ? VectorStart : Identity;
- cast<PHINode>(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader);
- cast<PHINode>(VecRdxPhi)
- ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
+ Value *StartVal = (part == 0) ? VectorStart : Identity;
+ cast<PHINode>(VecRdxPhi[part])
+ ->addIncoming(StartVal, LoopVectorPreHeader);
+ cast<PHINode>(VecRdxPhi[part])
+ ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)->getLoopLatch());
}
// Before each round, move the insertion point right between
@@ -4214,6 +4207,7 @@ void InnerLoopVectorizer::fixReduction(P
// instructions.
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
+ VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst);
setDebugLocFromInst(Builder, LoopExitInst);
// If the vector reduction can be performed in a smaller type, we truncate
@@ -4222,42 +4216,37 @@ void InnerLoopVectorizer::fixReduction(P
if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
Builder.SetInsertPoint(LoopVectorBody->getTerminator());
- VectorParts RdxParts(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
- Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
+ for (unsigned part = 0; part < UF; ++part) {
+ Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
- : Builder.CreateZExt(Trunc, VecTy);
- for (Value::user_iterator UI = RdxParts[Part]->user_begin();
- UI != RdxParts[Part]->user_end();)
+ : Builder.CreateZExt(Trunc, VecTy);
+ for (Value::user_iterator UI = RdxParts[part]->user_begin();
+ UI != RdxParts[part]->user_end();)
if (*UI != Trunc) {
- (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd);
- RdxParts[Part] = Extnd;
+ (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
+ RdxParts[part] = Extnd;
} else {
++UI;
}
}
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- for (unsigned Part = 0; Part < UF; ++Part) {
- RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
- VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]);
- }
+ for (unsigned part = 0; part < UF; ++part)
+ RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
}
// Reduce all of the unrolled parts into a single vector.
- Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
+ Value *ReducedPartRdx = RdxParts[0];
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
setDebugLocFromInst(Builder, ReducedPartRdx);
- for (unsigned Part = 1; Part < UF; ++Part) {
- Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
+ for (unsigned part = 1; part < UF; ++part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.
ReducedPartRdx = addFastMathFlag(
- Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
+ Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
ReducedPartRdx, "bin.rdx"));
else
ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
- Builder, MinMaxKind, ReducedPartRdx, RdxPart);
+ Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
}
if (VF > 1) {
@@ -4529,16 +4518,14 @@ InnerLoopVectorizer::createEdgeMask(Basi
assert(BI && "Unexpected terminator found");
if (BI->isConditional()) {
+ VectorParts EdgeMask = getVectorValue(BI->getCondition());
- VectorParts EdgeMask(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
- if (BI->getSuccessor(0) != Dst)
- EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
+ if (BI->getSuccessor(0) != Dst)
+ for (unsigned part = 0; part < UF; ++part)
+ EdgeMask[part] = Builder.CreateNot(EdgeMask[part]);
- EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
- EdgeMask[Part] = EdgeMaskPart;
- }
+ for (unsigned part = 0; part < UF; ++part)
+ EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
EdgeMaskCache[Edge] = EdgeMask;
return EdgeMask;
@@ -4557,27 +4544,23 @@ InnerLoopVectorizer::createBlockInMask(B
if (BCEntryIt != BlockMaskCache.end())
return BCEntryIt->second;
- VectorParts BlockMask(UF);
-
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB) {
Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = getOrCreateVectorValue(C, Part);
+ const VectorParts &BlockMask = getVectorValue(C);
BlockMaskCache[BB] = BlockMask;
return BlockMask;
}
// This is the block mask. We OR all incoming edges, and with zero.
Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = getOrCreateVectorValue(Zero, Part);
+ VectorParts BlockMask = getVectorValue(Zero);
// For each pred:
- for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) {
- VectorParts EM = createEdgeMask(*It, BB);
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EM[Part]);
+ for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) {
+ VectorParts EM = createEdgeMask(*it, BB);
+ for (unsigned part = 0; part < UF; ++part)
+ BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
}
BlockMaskCache[BB] = BlockMask;
@@ -4592,14 +4575,15 @@ void InnerLoopVectorizer::widenPHIInstru
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
- for (unsigned Part = 0; Part < UF; ++Part) {
+ VectorParts Entry(UF);
+ for (unsigned part = 0; part < UF; ++part) {
// This is phase one of vectorizing PHIs.
Type *VecTy =
(VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
- Value *EntryPart = PHINode::Create(
+ Entry[part] = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
- VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
}
+ VectorLoopValueMap.initVector(P, Entry);
return;
}
@@ -4623,22 +4607,21 @@ void InnerLoopVectorizer::widenPHIInstru
for (unsigned In = 0; In < NumIncoming; In++) {
VectorParts Cond =
createEdgeMask(P->getIncomingBlock(In), P->getParent());
+ const VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *In0 = getOrCreateVectorValue(P->getIncomingValue(In), Part);
+ for (unsigned part = 0; part < UF; ++part) {
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
if (In == 0)
- Entry[Part] = Builder.CreateSelect(Cond[Part], In0, In0);
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]);
else
// Select between the current value and the previous incoming edge
// based on the incoming mask.
- Entry[Part] = Builder.CreateSelect(Cond[Part], In0, Entry[Part],
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part],
"predphi");
}
}
- for (unsigned Part = 0; Part < UF; ++Part)
- VectorLoopValueMap.setVectorValue(P, Part, Entry[Part]);
+ VectorLoopValueMap.initVector(P, Entry);
return;
}
@@ -4669,15 +4652,18 @@ void InnerLoopVectorizer::widenPHIInstru
unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
// These are the scalar results. Notice that we don't generate vector GEPs
// because scalar GEPs result in better code.
+ ScalarParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
+ Entry[Part].resize(VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
SclrGep->setName("next.gep");
- VectorLoopValueMap.setScalarValue(P, Part, Lane, SclrGep);
+ Entry[Part][Lane] = SclrGep;
}
}
+ VectorLoopValueMap.initScalar(P, Entry);
return;
}
}
@@ -4727,6 +4713,7 @@ void InnerLoopVectorizer::vectorizeInstr
// is vector-typed. Thus, to keep the representation compact, we only use
// vector-typed operands for loop-varying values.
auto *GEP = cast<GetElementPtrInst>(&I);
+ VectorParts Entry(UF);
if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
// If we are vectorizing, but the GEP has only loop-invariant operands,
@@ -4742,11 +4729,8 @@ void InnerLoopVectorizer::vectorizeInstr
// collectLoopScalars() and teach getVectorValue() to broadcast
// the lane-zero scalar value.
auto *Clone = Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
- VectorLoopValueMap.setVectorValue(&I, Part, EntryPart);
- addMetadata(EntryPart, GEP);
- }
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = Builder.CreateVectorSplat(VF, Clone);
} else {
// If the GEP has at least one loop-varying operand, we are sure to
// produce a vector of pointers. But if we are only unrolling, we want
@@ -4759,10 +4743,9 @@ void InnerLoopVectorizer::vectorizeInstr
// The pointer operand of the new GEP. If it's loop-invariant, we
// won't broadcast it.
- auto *Ptr =
- OrigLoop->isLoopInvariant(GEP->getPointerOperand())
- ? GEP->getPointerOperand()
- : getOrCreateVectorValue(GEP->getPointerOperand(), Part);
+ auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand())
+ ? GEP->getPointerOperand()
+ : getVectorValue(GEP->getPointerOperand())[Part];
// Collect all the indices for the new GEP. If any index is
// loop-invariant, we won't broadcast it.
@@ -4771,7 +4754,7 @@ void InnerLoopVectorizer::vectorizeInstr
if (OrigLoop->isLoopInvariant(U.get()))
Indices.push_back(U.get());
else
- Indices.push_back(getOrCreateVectorValue(U.get(), Part));
+ Indices.push_back(getVectorValue(U.get())[Part]);
}
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
@@ -4781,11 +4764,12 @@ void InnerLoopVectorizer::vectorizeInstr
: Builder.CreateGEP(Ptr, Indices);
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
- VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
- addMetadata(NewGEP, GEP);
+ Entry[Part] = NewGEP;
}
}
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, GEP);
break;
}
case Instruction::UDiv:
@@ -4816,20 +4800,22 @@ void InnerLoopVectorizer::vectorizeInstr
// Just widen binops.
auto *BinOp = cast<BinaryOperator>(&I);
setDebugLocFromInst(Builder, BinOp);
+ const VectorParts &A = getVectorValue(BinOp->getOperand(0));
+ const VectorParts &B = getVectorValue(BinOp->getOperand(1));
+ // Use this vector value for all users of the original instruction.
+ VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
- Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
VecOp->copyIRFlags(BinOp);
- // Use this vector value for all users of the original instruction.
- VectorLoopValueMap.setVectorValue(&I, Part, V);
- addMetadata(V, BinOp);
+ Entry[Part] = V;
}
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, BinOp);
break;
}
case Instruction::Select: {
@@ -4845,19 +4831,20 @@ void InnerLoopVectorizer::vectorizeInstr
// loop. This means that we can't just use the original 'cond' value.
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
+ const VectorParts &Cond = getVectorValue(I.getOperand(0));
+ const VectorParts &Op0 = getVectorValue(I.getOperand(1));
+ const VectorParts &Op1 = getVectorValue(I.getOperand(2));
- auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), 0, 0);
+ auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0);
+ VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
- Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
- Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
- Value *Sel =
- Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
- VectorLoopValueMap.setVectorValue(&I, Part, Sel);
- addMetadata(Sel, &I);
+ Entry[Part] = Builder.CreateSelect(
+ InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
}
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
break;
}
@@ -4867,20 +4854,22 @@ void InnerLoopVectorizer::vectorizeInstr
bool FCmp = (I.getOpcode() == Instruction::FCmp);
auto *Cmp = dyn_cast<CmpInst>(&I);
setDebugLocFromInst(Builder, Cmp);
+ const VectorParts &A = getVectorValue(Cmp->getOperand(0));
+ const VectorParts &B = getVectorValue(Cmp->getOperand(1));
+ VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part);
- Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part);
Value *C = nullptr;
if (FCmp) {
- C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
cast<FCmpInst>(C)->copyFastMathFlags(Cmp);
} else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
}
- VectorLoopValueMap.setVectorValue(&I, Part, C);
- addMetadata(C, &I);
+ Entry[Part] = C;
}
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
break;
}
@@ -4917,12 +4906,12 @@ void InnerLoopVectorizer::vectorizeInstr
Type *DestTy =
(VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = getOrCreateVectorValue(CI->getOperand(0), Part);
- Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- VectorLoopValueMap.setVectorValue(&I, Part, Cast);
- addMetadata(Cast, &I);
- }
+ const VectorParts &A = getVectorValue(CI->getOperand(0));
+ VectorParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
break;
}
@@ -4960,14 +4949,17 @@ void InnerLoopVectorizer::vectorizeInstr
break;
}
+ VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
Value *Arg = CI->getArgOperand(i);
// Some intrinsics have a scalar argument - don't replace it with a
// vector.
- if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i))
- Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part);
+ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
+ const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
+ Arg = VectorArg[Part];
+ }
Args.push_back(Arg);
}
@@ -5000,10 +4992,11 @@ void InnerLoopVectorizer::vectorizeInstr
if (isa<FPMathOperator>(V))
V->copyFastMathFlags(CI);
- VectorLoopValueMap.setVectorValue(&I, Part, V);
- addMetadata(V, &I);
+ Entry[Part] = V;
}
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
break;
}
More information about the llvm-commits
mailing list