<div dir="ltr">This change caused a selfhost failure:<div><br></div><div><a href="http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules-2/builds/8963/steps/compile.llvm.stage2/logs/stdio">http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules-2/builds/8963/steps/compile.llvm.stage2/logs/stdio</a><br></div><div><br></div><div>Please can you fix or revert?</div><div class="gmail_extra"><br><div class="gmail_quote">On 26 June 2017 at 14:03, Ayal Zaks via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: ayalz<br>
Date: Mon Jun 26 14:03:51 2017<br>
New Revision: 306331<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=306331&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=306331&view=rev</a><br>
Log:<br>
[LV] Changing the interface of ValueMap, NFC.<br>
<br>
Instead of providing access to the internal MapStorage holding all Values<br>
associated with a given Key, used for setting or resetting them all together,<br>
ValueMap keeps its MapStorage internal; its new interface allows getting,<br>
setting or resetting a single Value, per part or per part-and-lane.<br>
Follows the discussion in <a href="https://reviews.llvm.org/D32871" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D32871</a>.<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D34473" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D34473</a><br>
<br>
Modified:<br>
llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp<br>
<br>
Modified: llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=306331&r1=306330&r2=306331&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Transforms/Vectorize/<wbr>LoopVectorize.cpp?rev=306331&<wbr>r1=306330&r2=306331&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp Mon Jun 26 14:03:51 2017<br>
@@ -532,21 +532,34 @@ protected:<br>
/// Returns true if we should generate a scalar version of \p IV.<br>
bool needsScalarInduction(<wbr>Instruction *IV) const;<br>
<br>
- /// Return a constant reference to the VectorParts corresponding to \p V from<br>
- /// the original loop. If the value has already been vectorized, the<br>
- /// corresponding vector entry in VectorLoopValueMap is returned. If,<br>
+ /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a<br>
+ /// vector or scalar value on-demand if one is not yet available. When<br>
+ /// vectorizing a loop, we visit the definition of an instruction before its<br>
+ /// uses. When visiting the definition, we either vectorize or scalarize the<br>
+ /// instruction, creating an entry for it in the corresponding map. (In some<br>
+ /// cases, such as induction variables, we will create both vector and scalar<br>
+ /// entries.) Then, as we encounter uses of the definition, we derive values<br>
+ /// for each scalar or vector use unless such a value is already available.<br>
+ /// For example, if we scalarize a definition and one of its uses is vector,<br>
+ /// we build the required vector on-demand with an insertelement sequence<br>
+ /// when visiting the use. Otherwise, if the use is scalar, we can use the<br>
+ /// existing scalar definition.<br>
+ ///<br>
+ /// Return a value in the new loop corresponding to \p V from the original<br>
+ /// loop at unroll index \p Part. If the value has already been vectorized,<br>
+ /// the corresponding vector entry in VectorLoopValueMap is returned. If,<br>
/// however, the value has a scalar entry in VectorLoopValueMap, we construct<br>
- /// new vector values on-demand by inserting the scalar values into vectors<br>
+ /// a new vector value on-demand by inserting the scalar values into a vector<br>
/// with an insertelement sequence. If the value has been neither vectorized<br>
/// nor scalarized, it must be loop invariant, so we simply broadcast the<br>
- /// value into vectors.<br>
- const VectorParts &getVectorValue(Value *V);<br>
+ /// value into a vector.<br>
+ Value *getOrCreateVectorValue(Value *V, unsigned Part);<br>
<br>
/// Return a value in the new loop corresponding to \p V from the original<br>
/// loop at unroll index \p Part and vector index \p Lane. If the value has<br>
/// been vectorized but not scalarized, the necessary extractelement<br>
/// instruction will be generated.<br>
- Value *getScalarValue(Value *V, unsigned Part, unsigned Lane);<br>
+ Value *getOrCreateScalarValue(Value *V, unsigned Part, unsigned Lane);<br>
<br>
/// Try to vectorize the interleaved access group that \p Instr belongs to.<br>
void vectorizeInterleaveGroup(<wbr>Instruction *Instr);<br>
@@ -601,90 +614,103 @@ protected:<br>
/// UF x VF scalar values in the new loop. UF and VF are the unroll and<br>
/// vectorization factors, respectively.<br>
///<br>
- /// Entries can be added to either map with initVector and initScalar, which<br>
- /// initialize and return a constant reference to the new entry. If a<br>
- /// non-constant reference to a vector entry is required, getVector can be<br>
- /// used to retrieve a mutable entry. We currently directly modify the mapped<br>
- /// values during "fix-up" operations that occur once the first phase of<br>
- /// widening is complete. These operations include type truncation and the<br>
- /// second phase of recurrence widening.<br>
+ /// Entries can be added to either map with setVectorValue and setScalarValue,<br>
+ /// which assert that an entry was not already added before. If an entry is to<br>
+ /// replace an existing one, call resetVectorValue. This is currently needed<br>
+ /// to modify the mapped values during "fix-up" operations that occur once the<br>
+ /// first phase of widening is complete. These operations include type<br>
+ /// truncation and the second phase of recurrence widening.<br>
///<br>
- /// Otherwise, entries from either map should be accessed using the<br>
- /// getVectorValue or getScalarValue functions from InnerLoopVectorizer.<br>
- /// getVectorValue and getScalarValue coordinate to generate a vector or<br>
- /// scalar value on-demand if one is not yet available. When vectorizing a<br>
- /// loop, we visit the definition of an instruction before its uses. When<br>
- /// visiting the definition, we either vectorize or scalarize the<br>
- /// instruction, creating an entry for it in the corresponding map. (In some<br>
- /// cases, such as induction variables, we will create both vector and scalar<br>
- /// entries.) Then, as we encounter uses of the definition, we derive values<br>
- /// for each scalar or vector use unless such a value is already available.<br>
- /// For example, if we scalarize a definition and one of its uses is vector,<br>
- /// we build the required vector on-demand with an insertelement sequence<br>
- /// when visiting the use. Otherwise, if the use is scalar, we can use the<br>
- /// existing scalar definition.<br>
+ /// Entries from either map can be retrieved using the getVectorValue and<br>
+ /// getScalarValue functions, which assert that the desired value exists.<br>
+<br>
struct ValueMap {<br>
<br>
/// Construct an empty map with the given unroll and vectorization factors.<br>
- ValueMap(unsigned UnrollFactor, unsigned VecWidth)<br>
- : UF(UnrollFactor), VF(VecWidth) {<br>
- // The unroll and vectorization factors are only used in asserts builds<br>
- // to verify map entries are sized appropriately.<br>
- (void)UF;<br>
- (void)VF;<br>
- }<br>
-<br>
- /// \return True if the map has a vector entry for \p Key.<br>
- bool hasVector(Value *Key) const { return VectorMapStorage.count(Key); }<br>
-<br>
- /// \return True if the map has a scalar entry for \p Key.<br>
- bool hasScalar(Value *Key) const { return ScalarMapStorage.count(Key); }<br>
-<br>
- /// \brief Map \p Key to the given VectorParts \p Entry, and return a<br>
- /// constant reference to the new vector map entry. The given key should<br>
- /// not already be in the map, and the given VectorParts should be<br>
- /// correctly sized for the current unroll factor.<br>
- const VectorParts &initVector(Value *Key, const VectorParts &Entry) {<br>
- assert(!hasVector(Key) && "Vector entry already initialized");<br>
- assert(Entry.size() == UF && "VectorParts has wrong dimensions");<br>
- VectorMapStorage[Key] = Entry;<br>
- return VectorMapStorage[Key];<br>
- }<br>
-<br>
- /// \brief Map \p Key to the given ScalarParts \p Entry, and return a<br>
- /// constant reference to the new scalar map entry. The given key should<br>
- /// not already be in the map, and the given ScalarParts should be<br>
- /// correctly sized for the current unroll and vectorization factors.<br>
- const ScalarParts &initScalar(Value *Key, const ScalarParts &Entry) {<br>
- assert(!hasScalar(Key) && "Scalar entry already initialized");<br>
- assert(Entry.size() == UF &&<br>
- all_of(make_range(Entry.begin(<wbr>), Entry.end()),<br>
- [&](const SmallVectorImpl<Value *> &Values) -> bool {<br>
- return Values.size() == VF;<br>
- }) &&<br>
- "ScalarParts has wrong dimensions");<br>
- ScalarMapStorage[Key] = Entry;<br>
- return ScalarMapStorage[Key];<br>
- }<br>
-<br>
- /// \return A reference to the vector map entry corresponding to \p Key.<br>
- /// The key should already be in the map. This function should only be used<br>
- /// when it's necessary to update values that have already been vectorized.<br>
- /// This is the case for "fix-up" operations including type truncation and<br>
- /// the second phase of recurrence vectorization. If a non-const reference<br>
- /// isn't required, getVectorValue should be used instead.<br>
- VectorParts &getVector(Value *Key) {<br>
- assert(hasVector(Key) && "Vector entry not initialized");<br>
- return VectorMapStorage.find(Key)-><wbr>second;<br>
- }<br>
-<br>
- /// Retrieve an entry from the vector or scalar maps. The preferred way to<br>
- /// access an existing mapped entry is with getVectorValue or<br>
- /// getScalarValue from InnerLoopVectorizer. Until those functions can be<br>
- /// moved inside ValueMap, we have to declare them as friends.<br>
- friend const VectorParts &InnerLoopVectorizer::<wbr>getVectorValue(Value *V);<br>
- friend Value *InnerLoopVectorizer::<wbr>getScalarValue(Value *V, unsigned Part,<br>
- unsigned Lane);<br>
+ ValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}<br>
+<br>
+ /// \return True if the map has any vector entry for \p Key.<br>
+ bool hasAnyVectorValue(Value *Key) const {<br>
+ return VectorMapStorage.count(Key);<br>
+ }<br>
+<br>
+ /// \return True if the map has a vector entry for \p Key and \p Part.<br>
+ bool hasVectorValue(Value *Key, unsigned Part) const {<br>
+ assert(Part < UF && "Queried Vector Part is too large.");<br>
+ if (!hasAnyVectorValue(Key))<br>
+ return false;<br>
+ const VectorParts &Entry = VectorMapStorage.find(Key)-><wbr>second;<br>
+ assert(Entry.size() == UF && "VectorParts has wrong dimensions.");<br>
+ return Entry[Part] != nullptr;<br>
+ }<br>
+<br>
+ /// \return True if the map has any scalar entry for \p Key.<br>
+ bool hasAnyScalarValue(Value *Key) const {<br>
+ return ScalarMapStorage.count(Key);<br>
+ }<br>
+<br>
+ /// \return True if the map has a scalar entry for \p Key, \p Part and<br>
+ /// \p Part.<br>
+ bool hasScalarValue(Value *Key, unsigned Part, unsigned Lane) const {<br>
+ assert(Part < UF && "Queried Scalar Part is too large.");<br>
+ assert(Lane < VF && "Queried Scalar Lane is too large.");<br>
+ if (!hasAnyScalarValue(Key))<br>
+ return false;<br>
+ const ScalarParts &Entry = ScalarMapStorage.find(Key)-><wbr>second;<br>
+ assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");<br>
+ assert(Entry[Part].size() == VF && "ScalarParts has wrong dimensions.");<br>
+ return Entry[Part][Lane] != nullptr;<br>
+ }<br>
+<br>
+ /// Retrieve the existing vector value that corresponds to \p Key and<br>
+ /// \p Part.<br>
+ Value *getVectorValue(Value *Key, unsigned Part) {<br>
+ assert(hasVectorValue(Key, Part) && "Getting non-existent value.");<br>
+ return VectorMapStorage[Key][Part];<br>
+ }<br>
+<br>
+ /// Retrieve the existing scalar value that corresponds to \p Key, \p Part<br>
+ /// and \p Lane.<br>
+ Value *getScalarValue(Value *Key, unsigned Part, unsigned Lane) {<br>
+ assert(hasScalarValue(Key, Part, Lane) && "Getting non-existent value.");<br>
+ return ScalarMapStorage[Key][Part][<wbr>Lane];<br>
+ }<br>
+<br>
+ /// Set a vector value associated with \p Key and \p Part. Assumes such a<br>
+ /// value is not already set. If it is, use resetVectorValue() instead.<br>
+ void setVectorValue(Value *Key, unsigned Part, Value *Vector) {<br>
+ assert(!hasVectorValue(Key, Part) && "Vector value already set for part");<br>
+ if (!VectorMapStorage.count(Key)) {<br>
+ VectorParts Entry(UF);<br>
+ VectorMapStorage[Key] = Entry;<br>
+ }<br>
+ VectorMapStorage[Key][Part] = Vector;<br>
+ }<br>
+<br>
+ /// Set a scalar value associated with \p Key for \p Part and \p Lane.<br>
+ /// Assumes such a value is not already set.<br>
+ void setScalarValue(Value *Key, unsigned Part, unsigned Lane,<br>
+ Value *Scalar) {<br>
+ assert(!hasScalarValue(Key, Part, Lane) && "Scalar value already set");<br>
+ if (!ScalarMapStorage.count(Key)) {<br>
+ ScalarParts Entry(UF);<br>
+ for (unsigned Part = 0; Part < UF; ++Part)<br>
+ Entry[Part].resize(VF, nullptr);<br>
+ // TODO: Consider storing uniform values only per-part, as they occupy<br>
+ // lane 0 only, keeping the other VF-1 redundant entries null.<br>
+ ScalarMapStorage[Key] = Entry;<br>
+ }<br>
+ ScalarMapStorage[Key][Part][<wbr>Lane] = Scalar;<br>
+ }<br>
+<br>
+ /// Reset the vector value associated with \p Key for the given \p Part.<br>
+ /// This function can be used to update values that have already been<br>
+ /// vectorized. This is the case for "fix-up" operations including type<br>
+ /// truncation and the second phase of recurrence vectorization.<br>
+ void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {<br>
+ assert(hasVectorValue(Key, Part) && "Vector value not set for part");<br>
+ VectorMapStorage[Key][Part] = Vector;<br>
+ }<br>
<br>
private:<br>
/// The unroll factor. Each entry in the vector map contains UF vector<br>
@@ -2417,15 +2443,13 @@ void InnerLoopVectorizer::<wbr>createVectorIn<br>
PHINode *VecInd = PHINode::Create(SteppedStart-><wbr>getType(), 2, "vec.ind",<br>
&*LoopVectorBody-><wbr>getFirstInsertionPt());<br>
Instruction *LastInduction = VecInd;<br>
- VectorParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Entry[Part] = LastInduction;<br>
+ VectorLoopValueMap.<wbr>setVectorValue(EntryVal, Part, LastInduction);<br>
+ if (isa<TruncInst>(EntryVal))<br>
+ addMetadata(LastInduction, EntryVal);<br>
LastInduction = cast<Instruction>(<wbr>addFastMathFlag(<br>
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));<br>
}<br>
- VectorLoopValueMap.initVector(<wbr>EntryVal, Entry);<br>
- if (isa<TruncInst>(EntryVal))<br>
- addMetadata(Entry, EntryVal);<br>
<br>
// Move the last step to the end of the latch block. This ensures consistent<br>
// placement of all induction updates.<br>
@@ -2531,13 +2555,13 @@ void InnerLoopVectorizer::<wbr>widenIntOrFpIn<br>
// induction variable, and build the necessary step vectors.<br>
if (!VectorizedIV) {<br>
Value *Broadcasted = getBroadcastInstrs(ScalarIV);<br>
- VectorParts Entry(UF);<br>
- for (unsigned Part = 0; Part < UF; ++Part)<br>
- Entry[Part] =<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *EntryPart =<br>
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());<br>
- VectorLoopValueMap.initVector(<wbr>EntryVal, Entry);<br>
- if (Trunc)<br>
- addMetadata(Entry, Trunc);<br>
+ VectorLoopValueMap.<wbr>setVectorValue(EntryVal, Part, EntryPart);<br>
+ if (Trunc)<br>
+ addMetadata(EntryPart, Trunc);<br>
+ }<br>
}<br>
<br>
// If an induction variable is only used for counting loop iterations or<br>
@@ -2637,17 +2661,14 @@ void InnerLoopVectorizer::<wbr>buildScalarSte<br>
Cost-><wbr>isUniformAfterVectorization(<wbr>cast<Instruction>(EntryVal), VF) ? 1 : VF;<br>
<br>
// Compute the scalar steps and save the results in VectorLoopValueMap.<br>
- ScalarParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Entry[Part].resize(VF);<br>
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {<br>
auto *StartIdx = getSignedIntOrFpConstant(<wbr>ScalarIVTy, VF * Part + Lane);<br>
auto *Mul = addFastMathFlag(Builder.<wbr>CreateBinOp(MulOp, StartIdx, Step));<br>
auto *Add = addFastMathFlag(Builder.<wbr>CreateBinOp(AddOp, ScalarIV, Mul));<br>
- Entry[Part][Lane] = Add;<br>
+ VectorLoopValueMap.<wbr>setScalarValue(EntryVal, Part, Lane, Add);<br>
}<br>
}<br>
- VectorLoopValueMap.initScalar(<wbr>EntryVal, Entry);<br>
}<br>
<br>
int LoopVectorizationLegality::<wbr>isConsecutivePtr(Value *Ptr) {<br>
@@ -2665,8 +2686,7 @@ bool LoopVectorizationLegality::<wbr>isUnifor<br>
return LAI->isUniform(V);<br>
}<br>
<br>
-const InnerLoopVectorizer::<wbr>VectorParts &<br>
-InnerLoopVectorizer::<wbr>getVectorValue(Value *V) {<br>
+Value *InnerLoopVectorizer::<wbr>getOrCreateVectorValue(Value *V, unsigned Part) {<br>
assert(V != Induction && "The new induction variable should not be used.");<br>
assert(!V->getType()-><wbr>isVectorTy() && "Can't widen a vector");<br>
assert(!V->getType()-><wbr>isVoidTy() && "Type does not produce a value");<br>
@@ -2675,17 +2695,16 @@ InnerLoopVectorizer::<wbr>getVectorValue(Valu<br>
if (Legal->hasStride(V))<br>
V = ConstantInt::get(V->getType(), 1);<br>
<br>
- // If we have this scalar in the map, return it.<br>
- if (VectorLoopValueMap.hasVector(<wbr>V))<br>
- return VectorLoopValueMap.<wbr>VectorMapStorage[V];<br>
+ // If we have a vector mapped to this value, return it.<br>
+ if (VectorLoopValueMap.<wbr>hasVectorValue(V, Part))<br>
+ return VectorLoopValueMap.<wbr>getVectorValue(V, Part);<br>
<br>
// If the value has not been vectorized, check if it has been scalarized<br>
// instead. If it has been scalarized, and we actually need the value in<br>
// vector form, we will construct the vector values on demand.<br>
- if (VectorLoopValueMap.hasScalar(<wbr>V)) {<br>
+ if (VectorLoopValueMap.<wbr>hasAnyScalarValue(V)) {<br>
<br>
- // Initialize a new vector map entry.<br>
- VectorParts Entry(UF);<br>
+ Value *ScalarValue = VectorLoopValueMap.<wbr>getScalarValue(V, Part, 0);<br>
<br>
// If we've scalarized a value, that value should be an instruction.<br>
auto *I = cast<Instruction>(V);<br>
@@ -2693,9 +2712,8 @@ InnerLoopVectorizer::<wbr>getVectorValue(Valu<br>
// If we aren't vectorizing, we can just copy the scalar map values over to<br>
// the vector map.<br>
if (VF == 1) {<br>
- for (unsigned Part = 0; Part < UF; ++Part)<br>
- Entry[Part] = getScalarValue(V, Part, 0);<br>
- return VectorLoopValueMap.initVector(<wbr>V, Entry);<br>
+ VectorLoopValueMap.<wbr>setVectorValue(V, Part, ScalarValue);<br>
+ return ScalarValue;<br>
}<br>
<br>
// Get the last scalar instruction we generated for V. If the value is<br>
@@ -2703,7 +2721,8 @@ InnerLoopVectorizer::<wbr>getVectorValue(Valu<br>
// of the last unroll iteration. Otherwise, the last instruction is the one<br>
// we created for the last vector lane of the last unroll iteration.<br>
unsigned LastLane = Cost-><wbr>isUniformAfterVectorization(I, VF) ? 0 : VF - 1;<br>
- auto *LastInst = cast<Instruction>(<wbr>getScalarValue(V, UF - 1, LastLane));<br>
+ auto *LastInst =<br>
+ cast<Instruction>(<wbr>getOrCreateScalarValue(V, UF - 1, LastLane));<br>
<br>
// Set the insert point after the last scalarized instruction. This ensures<br>
// the insertelement sequence will directly follow the scalar definitions.<br>
@@ -2717,52 +2736,50 @@ InnerLoopVectorizer::<wbr>getVectorValue(Valu<br>
// iteration. Otherwise, we construct the vector values using insertelement<br>
// instructions. Since the resulting vectors are stored in<br>
// VectorLoopValueMap, we will only generate the insertelements once.<br>
- for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Value *VectorValue = nullptr;<br>
- if (Cost-><wbr>isUniformAfterVectorization(I, VF)) {<br>
- VectorValue = getBroadcastInstrs(<wbr>getScalarValue(V, Part, 0));<br>
- } else {<br>
- VectorValue = UndefValue::get(VectorType::<wbr>get(V->getType(), VF));<br>
- for (unsigned Lane = 0; Lane < VF; ++Lane)<br>
- VectorValue = Builder.CreateInsertElement(<br>
- VectorValue, getScalarValue(V, Part, Lane),<br>
- Builder.getInt32(Lane));<br>
- }<br>
- Entry[Part] = VectorValue;<br>
+ Value *VectorValue = nullptr;<br>
+ if (Cost-><wbr>isUniformAfterVectorization(I, VF)) {<br>
+ VectorValue = getBroadcastInstrs(<wbr>ScalarValue);<br>
+ } else {<br>
+ VectorValue = UndefValue::get(VectorType::<wbr>get(V->getType(), VF));<br>
+ for (unsigned Lane = 0; Lane < VF; ++Lane)<br>
+ VectorValue = Builder.CreateInsertElement(<br>
+ VectorValue, getOrCreateScalarValue(V, Part, Lane),<br>
+ Builder.getInt32(Lane));<br>
}<br>
+ VectorLoopValueMap.<wbr>setVectorValue(V, Part, VectorValue);<br>
Builder.restoreIP(OldIP);<br>
- return VectorLoopValueMap.initVector(<wbr>V, Entry);<br>
+ return VectorValue;<br>
}<br>
<br>
// If this scalar is unknown, assume that it is a constant or that it is<br>
// loop invariant. Broadcast V and save the value for future uses.<br>
Value *B = getBroadcastInstrs(V);<br>
- return VectorLoopValueMap.initVector(<wbr>V, VectorParts(UF, B));<br>
+ VectorLoopValueMap.<wbr>setVectorValue(V, Part, B);<br>
+ return B;<br>
}<br>
<br>
-Value *InnerLoopVectorizer::<wbr>getScalarValue(Value *V, unsigned Part,<br>
- unsigned Lane) {<br>
+Value *InnerLoopVectorizer::<wbr>getOrCreateScalarValue(Value *V, unsigned Part,<br>
+ unsigned Lane) {<br>
<br>
// If the value is not an instruction contained in the loop, it should<br>
// already be scalar.<br>
if (OrigLoop->isLoopInvariant(V))<br>
return V;<br>
<br>
- assert(Lane > 0 ?<br>
- !Cost-><wbr>isUniformAfterVectorization(<wbr>cast<Instruction>(V), VF)<br>
- : true && "Uniform values only have lane zero");<br>
+ assert(Lane > 0 ? !Cost-><wbr>isUniformAfterVectorization(<wbr>cast<Instruction>(V), VF)<br>
+ : true && "Uniform values only have lane zero");<br>
<br>
// If the value from the original loop has not been vectorized, it is<br>
// represented by UF x VF scalar values in the new loop. Return the requested<br>
// scalar value.<br>
- if (VectorLoopValueMap.hasScalar(<wbr>V))<br>
- return VectorLoopValueMap.<wbr>ScalarMapStorage[V][Part][<wbr>Lane];<br>
+ if (VectorLoopValueMap.<wbr>hasScalarValue(V, Part, Lane))<br>
+ return VectorLoopValueMap.<wbr>getScalarValue(V, Part, Lane);<br>
<br>
// If the value has not been scalarized, get its entry in VectorLoopValueMap<br>
// for the given unroll part. If this entry is not a vector type (i.e., the<br>
// vectorization factor is one), there is no need to generate an<br>
// extractelement instruction.<br>
- auto *U = getVectorValue(V)[Part];<br>
+ auto *U = getOrCreateVectorValue(V, Part);<br>
if (!U->getType()->isVectorTy()) {<br>
assert(VF == 1 && "Value not scalarized has non-vector type");<br>
return U;<br>
@@ -2844,7 +2861,7 @@ void InnerLoopVectorizer::<wbr>vectorizeInter<br>
Index += (VF - 1) * Group->getFactor();<br>
<br>
for (unsigned Part = 0; Part < UF; Part++) {<br>
- Value *NewPtr = getScalarValue(Ptr, Part, 0);<br>
+ Value *NewPtr = getOrCreateScalarValue(Ptr, Part, 0);<br>
<br>
// Notice current instruction could be any index. Need to adjust the address<br>
// to the member of index 0.<br>
@@ -2887,7 +2904,6 @@ void InnerLoopVectorizer::<wbr>vectorizeInter<br>
if (!Member)<br>
continue;<br>
<br>
- VectorParts Entry(UF);<br>
Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);<br>
for (unsigned Part = 0; Part < UF; Part++) {<br>
Value *StridedVec = Builder.CreateShuffleVector(<br>
@@ -2899,10 +2915,11 @@ void InnerLoopVectorizer::<wbr>vectorizeInter<br>
StridedVec = Builder.<wbr>CreateBitOrPointerCast(<wbr>StridedVec, OtherVTy);<br>
}<br>
<br>
- Entry[Part] =<br>
- Group->isReverse() ? reverseVector(StridedVec) : StridedVec;<br>
+ if (Group->isReverse())<br>
+ StridedVec = reverseVector(StridedVec);<br>
+<br>
+ VectorLoopValueMap.<wbr>setVectorValue(Member, Part, StridedVec);<br>
}<br>
- VectorLoopValueMap.initVector(<wbr>Member, Entry);<br>
}<br>
return;<br>
}<br>
@@ -2919,8 +2936,8 @@ void InnerLoopVectorizer::<wbr>vectorizeInter<br>
Instruction *Member = Group->getMember(i);<br>
assert(Member && "Fail to get a member from an interleaved store group");<br>
<br>
- Value *StoredVec =<br>
- getVectorValue(cast<StoreInst><wbr>(Member)->getValueOperand())[<wbr>Part];<br>
+ Value *StoredVec = getOrCreateVectorValue(<br>
+ cast<StoreInst>(Member)-><wbr>getValueOperand(), Part);<br>
if (Group->isReverse())<br>
StoredVec = reverseVector(StoredVec);<br>
<br>
@@ -2981,16 +2998,14 @@ void InnerLoopVectorizer::<wbr>vectorizeMemor<br>
bool CreateGatherScatter =<br>
(Decision == LoopVectorizationCostModel::<wbr>CM_GatherScatter);<br>
<br>
- VectorParts VectorGep;<br>
+ // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector<br>
+ // gather/scatter. Otherwise Decision should have been to Scalarize.<br>
+ assert((ConsecutiveStride || CreateGatherScatter) &&<br>
+ "The instruction should be scalarized");<br>
<br>
// Handle consecutive loads/stores.<br>
- if (ConsecutiveStride) {<br>
- Ptr = getScalarValue(Ptr, 0, 0);<br>
- } else {<br>
- // At this point we should vector version of GEP for Gather or Scatter<br>
- assert(CreateGatherScatter && "The instruction should be scalarized");<br>
- VectorGep = getVectorValue(Ptr);<br>
- }<br>
+ if (ConsecutiveStride)<br>
+ Ptr = getOrCreateScalarValue(Ptr, 0, 0);<br>
<br>
VectorParts Mask = createBlockInMask(Instr-><wbr>getParent());<br>
// Handle Stores:<br>
@@ -2998,16 +3013,15 @@ void InnerLoopVectorizer::<wbr>vectorizeMemor<br>
assert(!Legal->isUniform(SI-><wbr>getPointerOperand()) &&<br>
"We do not allow storing to uniform addresses");<br>
setDebugLocFromInst(Builder, SI);<br>
- // We don't want to update the value in the map as it might be used in<br>
- // another expression. So don't use a reference type for "StoredVal".<br>
- VectorParts StoredVal = getVectorValue(SI-><wbr>getValueOperand());<br>
<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
Instruction *NewSI = nullptr;<br>
+ Value *StoredVal = getOrCreateVectorValue(SI-><wbr>getValueOperand(), Part);<br>
if (CreateGatherScatter) {<br>
Value *MaskPart = Legal->isMaskRequired(SI) ? Mask[Part] : nullptr;<br>
- NewSI = Builder.CreateMaskedScatter(<wbr>StoredVal[Part], VectorGep[Part],<br>
- Alignment, MaskPart);<br>
+ Value *VectorGep = getOrCreateVectorValue(Ptr, Part);<br>
+ NewSI = Builder.CreateMaskedScatter(<wbr>StoredVal, VectorGep, Alignment,<br>
+ MaskPart);<br>
} else {<br>
// Calculate the pointer for the specific unroll-part.<br>
Value *PartPtr =<br>
@@ -3016,7 +3030,7 @@ void InnerLoopVectorizer::<wbr>vectorizeMemor<br>
if (Reverse) {<br>
// If we store to reverse consecutive memory locations, then we need<br>
// to reverse the order of elements in the stored value.<br>
- StoredVal[Part] = reverseVector(StoredVal[Part])<wbr>;<br>
+ StoredVal = reverseVector(StoredVal);<br>
// If the address is consecutive but reversed, then the<br>
// wide store needs to start at the last vector element.<br>
PartPtr =<br>
@@ -3030,11 +3044,10 @@ void InnerLoopVectorizer::<wbr>vectorizeMemor<br>
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(<wbr>AddressSpace));<br>
<br>
if (Legal->isMaskRequired(SI))<br>
- NewSI = Builder.CreateMaskedStore(<wbr>StoredVal[Part], VecPtr, Alignment,<br>
+ NewSI = Builder.CreateMaskedStore(<wbr>StoredVal, VecPtr, Alignment,<br>
Mask[Part]);<br>
else<br>
- NewSI =<br>
- Builder.CreateAlignedStore(<wbr>StoredVal[Part], VecPtr, Alignment);<br>
+ NewSI = Builder.CreateAlignedStore(<wbr>StoredVal, VecPtr, Alignment);<br>
}<br>
addMetadata(NewSI, SI);<br>
}<br>
@@ -3044,14 +3057,13 @@ void InnerLoopVectorizer::<wbr>vectorizeMemor<br>
// Handle loads.<br>
assert(LI && "Must have a load instruction");<br>
setDebugLocFromInst(Builder, LI);<br>
- VectorParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Instruction *NewLI;<br>
+ Value *NewLI;<br>
if (CreateGatherScatter) {<br>
Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;<br>
- NewLI = Builder.CreateMaskedGather(<wbr>VectorGep[Part], Alignment, MaskPart,<br>
+ Value *VectorGep = getOrCreateVectorValue(Ptr, Part);<br>
+ NewLI = Builder.CreateMaskedGather(<wbr>VectorGep, Alignment, MaskPart,<br>
nullptr, "wide.masked.gather");<br>
- Entry[Part] = NewLI;<br>
} else {<br>
// Calculate the pointer for the specific unroll-part.<br>
Value *PartPtr =<br>
@@ -3073,11 +3085,12 @@ void InnerLoopVectorizer::<wbr>vectorizeMemor<br>
"wide.masked.load");<br>
else<br>
NewLI = Builder.CreateAlignedLoad(<wbr>VecPtr, Alignment, "wide.load");<br>
- Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;<br>
+ if (Reverse)<br>
+ NewLI = reverseVector(NewLI);<br></blockquote><div><br></div><div>This updates NewLI to point at a non-load instruction...</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
addMetadata(NewLI, LI);<br></blockquote><div><br></div><div>... and then this copies load metadata to the non-load instruction.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ VectorLoopValueMap.<wbr>setVectorValue(Instr, Part, NewLI);<br>
}<br>
- VectorLoopValueMap.initVector(<wbr>Instr, Entry);<br>
}<br>
<br>
void InnerLoopVectorizer::<wbr>scalarizeInstruction(<wbr>Instruction *Instr,<br>
@@ -3094,9 +3107,6 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>
// Does this instruction return a value ?<br>
bool IsVoidRetTy = Instr->getType()->isVoidTy();<br>
<br>
- // Initialize a new scalar map entry.<br>
- ScalarParts Entry(UF);<br>
-<br>
VectorParts Cond;<br>
if (IfPredicateInstr)<br>
Cond = createBlockInMask(Instr-><wbr>getParent());<br>
@@ -3108,7 +3118,6 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>
<br>
// For each vector unroll 'part':<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Entry[Part].resize(VF);<br>
// For each scalar that we create:<br>
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {<br>
<br>
@@ -3129,7 +3138,7 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>
// Replace the operands of the cloned instructions with their scalar<br>
// equivalents in the new loop.<br>
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {<br>
- auto *NewOp = getScalarValue(Instr-><wbr>getOperand(op), Part, Lane);<br>
+ auto *NewOp = getOrCreateScalarValue(Instr-><wbr>getOperand(op), Part, Lane);<br>
Cloned->setOperand(op, NewOp);<br>
}<br>
addNewMetadata(Cloned, Instr);<br>
@@ -3138,7 +3147,7 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>
Builder.Insert(Cloned);<br>
<br>
// Add the cloned scalar to the scalar map entry.<br>
- Entry[Part][Lane] = Cloned;<br>
+ VectorLoopValueMap.<wbr>setScalarValue(Instr, Part, Lane, Cloned);<br>
<br>
// If we just cloned a new assumption, add it the assumption cache.<br>
if (auto *II = dyn_cast<IntrinsicInst>(<wbr>Cloned))<br>
@@ -3150,7 +3159,6 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>
PredicatedInstructions.push_<wbr>back(std::make_pair(Cloned, Cmp));<br>
}<br>
}<br>
- VectorLoopValueMap.initScalar(<wbr>Instr, Entry);<br>
}<br>
<br>
PHINode *InnerLoopVectorizer::<wbr>createInductionVariable(Loop *L, Value *Start,<br>
@@ -3786,10 +3794,10 @@ void InnerLoopVectorizer::<wbr>truncateToMini<br>
// If the value wasn't vectorized, we must maintain the original scalar<br>
// type. The absence of the value from VectorLoopValueMap indicates that it<br>
// wasn't vectorized.<br>
- if (!VectorLoopValueMap.<wbr>hasVector(KV.first))<br>
+ if (!VectorLoopValueMap.<wbr>hasAnyVectorValue(KV.first))<br>
continue;<br>
- VectorParts &Parts = VectorLoopValueMap.getVector(<wbr>KV.first);<br>
- for (Value *&I : Parts) {<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *I = getOrCreateVectorValue(KV.<wbr>first, Part);<br>
if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))<br>
continue;<br>
Type *OriginalTy = I->getType();<br>
@@ -3878,7 +3886,7 @@ void InnerLoopVectorizer::<wbr>truncateToMini<br>
I->replaceAllUsesWith(Res);<br>
cast<Instruction>(I)-><wbr>eraseFromParent();<br>
Erased.insert(I);<br>
- I = Res;<br>
+ VectorLoopValueMap.<wbr>resetVectorValue(KV.first, Part, Res);<br>
}<br>
}<br>
<br>
@@ -3887,15 +3895,15 @@ void InnerLoopVectorizer::<wbr>truncateToMini<br>
// If the value wasn't vectorized, we must maintain the original scalar<br>
// type. The absence of the value from VectorLoopValueMap indicates that it<br>
// wasn't vectorized.<br>
- if (!VectorLoopValueMap.<wbr>hasVector(KV.first))<br>
+ if (!VectorLoopValueMap.<wbr>hasAnyVectorValue(KV.first))<br>
continue;<br>
- VectorParts &Parts = VectorLoopValueMap.getVector(<wbr>KV.first);<br>
- for (Value *&I : Parts) {<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *I = getOrCreateVectorValue(KV.<wbr>first, Part);<br>
ZExtInst *Inst = dyn_cast<ZExtInst>(I);<br>
if (Inst && Inst->use_empty()) {<br>
Value *NewI = Inst->getOperand(0);<br>
Inst->eraseFromParent();<br>
- I = NewI;<br>
+ VectorLoopValueMap.<wbr>resetVectorValue(KV.first, Part, NewI);<br>
}<br>
}<br>
}<br>
@@ -4025,8 +4033,8 @@ void InnerLoopVectorizer::<wbr>fixFirstOrderR<br>
<br>
// We constructed a temporary phi node in the first phase of vectorization.<br>
// This phi node will eventually be deleted.<br>
- VectorParts &PhiParts = VectorLoopValueMap.getVector(<wbr>Phi);<br>
- Builder.SetInsertPoint(cast<<wbr>Instruction>(PhiParts[0]));<br>
+ Builder.SetInsertPoint(<br>
+ cast<Instruction>(<wbr>VectorLoopValueMap.<wbr>getVectorValue(Phi, 0)));<br>
<br>
// Create a phi node for the new recurrence. The current value will either be<br>
// the initial value inserted into a vector or loop-varying vector value.<br>
@@ -4034,19 +4042,19 @@ void InnerLoopVectorizer::<wbr>fixFirstOrderR<br>
VecPhi->addIncoming(<wbr>VectorInit, LoopVectorPreHeader);<br>
<br>
// Get the vectorized previous value.<br>
- auto &PreviousParts = getVectorValue(Previous);<br>
+ Value *PreviousLastPart = getOrCreateVectorValue(<wbr>Previous, UF - 1);<br>
<br>
// Set the insertion point after the previous value if it is an instruction.<br>
// Note that the previous value may have been constant-folded so it is not<br>
// guaranteed to be an instruction in the vector loop. Also, if the previous<br>
// value is a phi node, we should insert after all the phi nodes to avoid<br>
// breaking basic block verification.<br>
- if (LI->getLoopFor(<wbr>LoopVectorBody)-><wbr>isLoopInvariant(PreviousParts[<wbr>UF - 1]) ||<br>
- isa<PHINode>(PreviousParts[UF - 1]))<br>
+ if (LI->getLoopFor(<wbr>LoopVectorBody)-><wbr>isLoopInvariant(<wbr>PreviousLastPart) ||<br>
+ isa<PHINode>(PreviousLastPart)<wbr>)<br>
Builder.SetInsertPoint(&*<wbr>LoopVectorBody-><wbr>getFirstInsertionPt());<br>
else<br>
Builder.SetInsertPoint(<br>
- &*++BasicBlock::iterator(cast<<wbr>Instruction>(PreviousParts[UF - 1])));<br>
+ &*++BasicBlock::iterator(cast<<wbr>Instruction>(PreviousLastPart)<wbr>));<br>
<br>
// We will construct a vector for the recurrence by combining the values for<br>
// the current and previous iterations. This is the required shuffle mask.<br>
@@ -4061,15 +4069,16 @@ void InnerLoopVectorizer::<wbr>fixFirstOrderR<br>
<br>
// Shuffle the current and previous vector and update the vector parts.<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *PreviousPart = getOrCreateVectorValue(<wbr>Previous, Part);<br>
+ Value *PhiPart = VectorLoopValueMap.<wbr>getVectorValue(Phi, Part);<br>
auto *Shuffle =<br>
- VF > 1<br>
- ? Builder.CreateShuffleVector(<wbr>Incoming, PreviousParts[Part],<br>
- ConstantVector::get(<wbr>ShuffleMask))<br>
- : Incoming;<br>
- PhiParts[Part]-><wbr>replaceAllUsesWith(Shuffle);<br>
- cast<Instruction>(PhiParts[<wbr>Part])->eraseFromParent();<br>
- PhiParts[Part] = Shuffle;<br>
- Incoming = PreviousParts[Part];<br>
+ VF > 1 ? Builder.CreateShuffleVector(<wbr>Incoming, PreviousPart,<br>
+ ConstantVector::get(<wbr>ShuffleMask))<br>
+ : Incoming;<br>
+ PhiPart->replaceAllUsesWith(<wbr>Shuffle);<br>
+ cast<Instruction>(PhiPart)-><wbr>eraseFromParent();<br>
+ VectorLoopValueMap.<wbr>resetVectorValue(Phi, Part, Shuffle);<br>
+ Incoming = PreviousPart;<br>
}<br>
<br>
// Fix the latch value of the new recurrence in the vector loop.<br>
@@ -4097,7 +4106,7 @@ void InnerLoopVectorizer::<wbr>fixFirstOrderR<br>
// `Incoming`. This is analogous to the vectorized case above: extracting the<br>
// second last element when VF > 1.<br>
else if (UF > 1)<br>
- ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2];<br>
+ ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(<wbr>Previous, UF - 2);<br>
<br>
// Fix the initial value of the original recurrence in the scalar loop.<br>
Builder.SetInsertPoint(&*<wbr>LoopScalarPreHeader->begin());<br>
@@ -4148,8 +4157,7 @@ void InnerLoopVectorizer::<wbr>fixReduction(P<br>
Builder.SetInsertPoint(<wbr>LoopBypassBlocks[1]-><wbr>getTerminator());<br>
<br>
// This is the vector-clone of the value that leaves the loop.<br>
- const VectorParts &VectorExit = getVectorValue(LoopExitInst);<br>
- Type *VecTy = VectorExit[0]->getType();<br>
+ Type *VecTy = getOrCreateVectorValue(<wbr>LoopExitInst, 0)->getType();<br>
<br>
// Find the reduction identity variable. Zero for addition, or, xor,<br>
// one for multiplication, -1 for And.<br>
@@ -4187,18 +4195,17 @@ void InnerLoopVectorizer::<wbr>fixReduction(P<br>
<br>
// Reductions do not have to start at zero. They can start with<br>
// any loop invariant values.<br>
- const VectorParts &VecRdxPhi = getVectorValue(Phi);<br>
BasicBlock *Latch = OrigLoop->getLoopLatch();<br>
Value *LoopVal = Phi->getIncomingValueForBlock(<wbr>Latch);<br>
- const VectorParts &Val = getVectorValue(LoopVal);<br>
- for (unsigned part = 0; part < UF; ++part) {<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);<br>
+ Value *Val = getOrCreateVectorValue(<wbr>LoopVal, Part);<br>
// Make sure to add the reduction stat value only to the<br>
// first unroll part.<br>
- Value *StartVal = (part == 0) ? VectorStart : Identity;<br>
- cast<PHINode>(VecRdxPhi[part])<br>
- ->addIncoming(StartVal, LoopVectorPreHeader);<br>
- cast<PHINode>(VecRdxPhi[part])<br>
- ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)<wbr>->getLoopLatch());<br>
+ Value *StartVal = (Part == 0) ? VectorStart : Identity;<br>
+ cast<PHINode>(VecRdxPhi)-><wbr>addIncoming(StartVal, LoopVectorPreHeader);<br>
+ cast<PHINode>(VecRdxPhi)<br>
+ ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)<wbr>->getLoopLatch());<br>
}<br>
<br>
// Before each round, move the insertion point right between<br>
@@ -4207,7 +4214,6 @@ void InnerLoopVectorizer::<wbr>fixReduction(P<br>
// instructions.<br>
Builder.SetInsertPoint(&*<wbr>LoopMiddleBlock-><wbr>getFirstInsertionPt());<br>
<br>
- VectorParts &RdxParts = VectorLoopValueMap.getVector(<wbr>LoopExitInst);<br>
setDebugLocFromInst(Builder, LoopExitInst);<br>
<br>
// If the vector reduction can be performed in a smaller type, we truncate<br>
@@ -4216,37 +4222,42 @@ void InnerLoopVectorizer::<wbr>fixReduction(P<br>
if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {<br>
Type *RdxVecTy = VectorType::get(RdxDesc.<wbr>getRecurrenceType(), VF);<br>
Builder.SetInsertPoint(<wbr>LoopVectorBody->getTerminator(<wbr>));<br>
- for (unsigned part = 0; part < UF; ++part) {<br>
- Value *Trunc = Builder.CreateTrunc(RdxParts[<wbr>part], RdxVecTy);<br>
+ VectorParts RdxParts(UF);<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ RdxParts[Part] = VectorLoopValueMap.<wbr>getVectorValue(LoopExitInst, Part);<br>
+ Value *Trunc = Builder.CreateTrunc(RdxParts[<wbr>Part], RdxVecTy);<br>
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)<br>
- : Builder.CreateZExt(Trunc, VecTy);<br>
- for (Value::user_iterator UI = RdxParts[part]->user_begin();<br>
- UI != RdxParts[part]->user_end();)<br>
+ : Builder.CreateZExt(Trunc, VecTy);<br>
+ for (Value::user_iterator UI = RdxParts[Part]->user_begin();<br>
+ UI != RdxParts[Part]->user_end();)<br>
if (*UI != Trunc) {<br>
- (*UI++)->replaceUsesOfWith(<wbr>RdxParts[part], Extnd);<br>
- RdxParts[part] = Extnd;<br>
+ (*UI++)->replaceUsesOfWith(<wbr>RdxParts[Part], Extnd);<br>
+ RdxParts[Part] = Extnd;<br>
} else {<br>
++UI;<br>
}<br>
}<br>
Builder.SetInsertPoint(&*<wbr>LoopMiddleBlock-><wbr>getFirstInsertionPt());<br>
- for (unsigned part = 0; part < UF; ++part)<br>
- RdxParts[part] = Builder.CreateTrunc(RdxParts[<wbr>part], RdxVecTy);<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ RdxParts[Part] = Builder.CreateTrunc(RdxParts[<wbr>Part], RdxVecTy);<br>
+ VectorLoopValueMap.<wbr>resetVectorValue(LoopExitInst, Part, RdxParts[Part]);<br>
+ }<br>
}<br>
<br>
// Reduce all of the unrolled parts into a single vector.<br>
- Value *ReducedPartRdx = RdxParts[0];<br>
+ Value *ReducedPartRdx = VectorLoopValueMap.<wbr>getVectorValue(LoopExitInst, 0);<br>
unsigned Op = RecurrenceDescriptor::<wbr>getRecurrenceBinOp(RK);<br>
setDebugLocFromInst(Builder, ReducedPartRdx);<br>
- for (unsigned part = 1; part < UF; ++part) {<br>
+ for (unsigned Part = 1; Part < UF; ++Part) {<br>
+ Value *RdxPart = VectorLoopValueMap.<wbr>getVectorValue(LoopExitInst, Part);<br>
if (Op != Instruction::ICmp && Op != Instruction::FCmp)<br>
// Floating point operations had to be 'fast' to enable the reduction.<br>
ReducedPartRdx = addFastMathFlag(<br>
- Builder.CreateBinOp((<wbr>Instruction::BinaryOps)Op, RdxParts[part],<br>
+ Builder.CreateBinOp((<wbr>Instruction::BinaryOps)Op, RdxPart,<br>
ReducedPartRdx, "bin.rdx"));<br>
else<br>
ReducedPartRdx = RecurrenceDescriptor::<wbr>createMinMaxOp(<br>
- Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);<br>
+ Builder, MinMaxKind, ReducedPartRdx, RdxPart);<br>
}<br>
<br>
if (VF > 1) {<br>
@@ -4518,14 +4529,16 @@ InnerLoopVectorizer::<wbr>createEdgeMask(Basi<br>
assert(BI && "Unexpected terminator found");<br>
<br>
if (BI->isConditional()) {<br>
- VectorParts EdgeMask = getVectorValue(BI-><wbr>getCondition());<br>
<br>
- if (BI->getSuccessor(0) != Dst)<br>
- for (unsigned part = 0; part < UF; ++part)<br>
- EdgeMask[part] = Builder.CreateNot(EdgeMask[<wbr>part]);<br>
+ VectorParts EdgeMask(UF);<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ auto *EdgeMaskPart = getOrCreateVectorValue(BI-><wbr>getCondition(), Part);<br>
+ if (BI->getSuccessor(0) != Dst)<br>
+ EdgeMaskPart = Builder.CreateNot(<wbr>EdgeMaskPart);<br>
<br>
- for (unsigned part = 0; part < UF; ++part)<br>
- EdgeMask[part] = Builder.CreateAnd(EdgeMask[<wbr>part], SrcMask[part]);<br>
+ EdgeMaskPart = Builder.CreateAnd(<wbr>EdgeMaskPart, SrcMask[Part]);<br>
+ EdgeMask[Part] = EdgeMaskPart;<br>
+ }<br>
<br>
EdgeMaskCache[Edge] = EdgeMask;<br>
return EdgeMask;<br>
@@ -4544,23 +4557,27 @@ InnerLoopVectorizer::<wbr>createBlockInMask(B<br>
if (BCEntryIt != BlockMaskCache.end())<br>
return BCEntryIt->second;<br>
<br>
+ VectorParts BlockMask(UF);<br>
+<br>
// Loop incoming mask is all-one.<br>
if (OrigLoop->getHeader() == BB) {<br>
Value *C = ConstantInt::get(IntegerType::<wbr>getInt1Ty(BB->getContext()), 1);<br>
- const VectorParts &BlockMask = getVectorValue(C);<br>
+ for (unsigned Part = 0; Part < UF; ++Part)<br>
+ BlockMask[Part] = getOrCreateVectorValue(C, Part);<br>
BlockMaskCache[BB] = BlockMask;<br>
return BlockMask;<br>
}<br>
<br>
// This is the block mask. We OR all incoming edges, and with zero.<br>
Value *Zero = ConstantInt::get(IntegerType::<wbr>getInt1Ty(BB->getContext()), 0);<br>
- VectorParts BlockMask = getVectorValue(Zero);<br>
+ for (unsigned Part = 0; Part < UF; ++Part)<br>
+ BlockMask[Part] = getOrCreateVectorValue(Zero, Part);<br>
<br>
// For each pred:<br>
- for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) {<br>
- VectorParts EM = createEdgeMask(*it, BB);<br>
- for (unsigned part = 0; part < UF; ++part)<br>
- BlockMask[part] = Builder.CreateOr(BlockMask[<wbr>part], EM[part]);<br>
+ for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) {<br>
+ VectorParts EM = createEdgeMask(*It, BB);<br>
+ for (unsigned Part = 0; Part < UF; ++Part)<br>
+ BlockMask[Part] = Builder.CreateOr(BlockMask[<wbr>Part], EM[Part]);<br>
}<br>
<br>
BlockMaskCache[BB] = BlockMask;<br>
@@ -4575,15 +4592,14 @@ void InnerLoopVectorizer::<wbr>widenPHIInstru<br>
// stage #1: We create a new vector PHI node with no incoming edges. We'll use<br>
// this value when we vectorize all of the instructions that use the PHI.<br>
if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(<wbr>P)) {<br>
- VectorParts Entry(UF);<br>
- for (unsigned part = 0; part < UF; ++part) {<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
// This is phase one of vectorizing PHIs.<br>
Type *VecTy =<br>
(VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);<br>
- Entry[part] = PHINode::Create(<br>
+ Value *EntryPart = PHINode::Create(<br>
VecTy, 2, "vec.phi", &*LoopVectorBody-><wbr>getFirstInsertionPt());<br>
+ VectorLoopValueMap.<wbr>setVectorValue(P, Part, EntryPart);<br>
}<br>
- VectorLoopValueMap.initVector(<wbr>P, Entry);<br>
return;<br>
}<br>
<br>
@@ -4607,21 +4623,22 @@ void InnerLoopVectorizer::<wbr>widenPHIInstru<br>
for (unsigned In = 0; In < NumIncoming; In++) {<br>
VectorParts Cond =<br>
createEdgeMask(P-><wbr>getIncomingBlock(In), P->getParent());<br>
- const VectorParts &In0 = getVectorValue(P-><wbr>getIncomingValue(In));<br>
<br>
- for (unsigned part = 0; part < UF; ++part) {<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *In0 = getOrCreateVectorValue(P-><wbr>getIncomingValue(In), Part);<br>
// We might have single edge PHIs (blocks) - use an identity<br>
// 'select' for the first PHI operand.<br>
if (In == 0)<br>
- Entry[part] = Builder.CreateSelect(Cond[<wbr>part], In0[part], In0[part]);<br>
+ Entry[Part] = Builder.CreateSelect(Cond[<wbr>Part], In0, In0);<br>
else<br>
// Select between the current value and the previous incoming edge<br>
// based on the incoming mask.<br>
- Entry[part] = Builder.CreateSelect(Cond[<wbr>part], In0[part], Entry[part],<br>
+ Entry[Part] = Builder.CreateSelect(Cond[<wbr>Part], In0, Entry[Part],<br>
"predphi");<br>
}<br>
}<br>
- VectorLoopValueMap.initVector(<wbr>P, Entry);<br>
+ for (unsigned Part = 0; Part < UF; ++Part)<br>
+ VectorLoopValueMap.<wbr>setVectorValue(P, Part, Entry[Part]);<br>
return;<br>
}<br>
<br>
@@ -4652,18 +4669,15 @@ void InnerLoopVectorizer::<wbr>widenPHIInstru<br>
unsigned Lanes = Cost-><wbr>isUniformAfterVectorization(P, VF) ? 1 : VF;<br>
// These are the scalar results. Notice that we don't generate vector GEPs<br>
// because scalar GEPs result in better code.<br>
- ScalarParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Entry[Part].resize(VF);<br>
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {<br>
Constant *Idx = ConstantInt::get(PtrInd-><wbr>getType(), Lane + Part * VF);<br>
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);<br>
Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);<br>
SclrGep->setName("next.gep");<br>
- Entry[Part][Lane] = SclrGep;<br>
+ VectorLoopValueMap.<wbr>setScalarValue(P, Part, Lane, SclrGep);<br>
}<br>
}<br>
- VectorLoopValueMap.initScalar(<wbr>P, Entry);<br>
return;<br>
}<br>
}<br>
@@ -4713,7 +4727,6 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
// is vector-typed. Thus, to keep the representation compact, we only use<br>
// vector-typed operands for loop-varying values.<br>
auto *GEP = cast<GetElementPtrInst>(&I);<br>
- VectorParts Entry(UF);<br>
<br>
if (VF > 1 && OrigLoop-><wbr>hasLoopInvariantOperands(GEP)) {<br>
// If we are vectorizing, but the GEP has only loop-invariant operands,<br>
@@ -4729,8 +4742,11 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
// collectLoopScalars() and teach getVectorValue() to broadcast<br>
// the lane-zero scalar value.<br>
auto *Clone = Builder.Insert(GEP->clone());<br>
- for (unsigned Part = 0; Part < UF; ++Part)<br>
- Entry[Part] = Builder.CreateVectorSplat(VF, Clone);<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);<br>
+ VectorLoopValueMap.<wbr>setVectorValue(&I, Part, EntryPart);<br>
+ addMetadata(EntryPart, GEP);<br>
+ }<br>
} else {<br>
// If the GEP has at least one loop-varying operand, we are sure to<br>
// produce a vector of pointers. But if we are only unrolling, we want<br>
@@ -4743,9 +4759,10 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
<br>
// The pointer operand of the new GEP. If it's loop-invariant, we<br>
// won't broadcast it.<br>
- auto *Ptr = OrigLoop->isLoopInvariant(GEP-<wbr>>getPointerOperand())<br>
- ? GEP->getPointerOperand()<br>
- : getVectorValue(GEP-><wbr>getPointerOperand())[Part];<br>
+ auto *Ptr =<br>
+ OrigLoop->isLoopInvariant(GEP-<wbr>>getPointerOperand())<br>
+ ? GEP->getPointerOperand()<br>
+ : getOrCreateVectorValue(GEP-><wbr>getPointerOperand(), Part);<br>
<br>
// Collect all the indices for the new GEP. If any index is<br>
// loop-invariant, we won't broadcast it.<br>
@@ -4754,7 +4771,7 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
if (OrigLoop->isLoopInvariant(U.<wbr>get()))<br>
Indices.push_back(U.get());<br>
else<br>
- Indices.push_back(<wbr>getVectorValue(U.get())[Part])<wbr>;<br>
+ Indices.push_back(<wbr>getOrCreateVectorValue(U.get()<wbr>, Part));<br>
}<br>
<br>
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,<br>
@@ -4764,12 +4781,11 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
: Builder.CreateGEP(Ptr, Indices);<br>
assert((VF == 1 || NewGEP->getType()->isVectorTy(<wbr>)) &&<br>
"NewGEP is not a pointer vector");<br>
- Entry[Part] = NewGEP;<br>
+ VectorLoopValueMap.<wbr>setVectorValue(&I, Part, NewGEP);<br>
+ addMetadata(NewGEP, GEP);<br>
}<br>
}<br>
<br>
- VectorLoopValueMap.initVector(<wbr>&I, Entry);<br>
- addMetadata(Entry, GEP);<br>
break;<br>
}<br>
case Instruction::UDiv:<br>
@@ -4800,22 +4816,20 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
// Just widen binops.<br>
auto *BinOp = cast<BinaryOperator>(&I);<br>
setDebugLocFromInst(Builder, BinOp);<br>
- const VectorParts &A = getVectorValue(BinOp-><wbr>getOperand(0));<br>
- const VectorParts &B = getVectorValue(BinOp-><wbr>getOperand(1));<br>
<br>
- // Use this vector value for all users of the original instruction.<br>
- VectorParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Value *V = Builder.CreateBinOp(BinOp-><wbr>getOpcode(), A[Part], B[Part]);<br>
+ Value *A = getOrCreateVectorValue(BinOp-><wbr>getOperand(0), Part);<br>
+ Value *B = getOrCreateVectorValue(BinOp-><wbr>getOperand(1), Part);<br>
+ Value *V = Builder.CreateBinOp(BinOp-><wbr>getOpcode(), A, B);<br>
<br>
if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))<br>
VecOp->copyIRFlags(BinOp);<br>
<br>
- Entry[Part] = V;<br>
+ // Use this vector value for all users of the original instruction.<br>
+ VectorLoopValueMap.<wbr>setVectorValue(&I, Part, V);<br>
+ addMetadata(V, BinOp);<br>
}<br>
<br>
- VectorLoopValueMap.initVector(<wbr>&I, Entry);<br>
- addMetadata(Entry, BinOp);<br>
break;<br>
}<br>
case Instruction::Select: {<br>
@@ -4831,20 +4845,19 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
// loop. This means that we can't just use the original 'cond' value.<br>
// We have to take the 'vectorized' value and pick the first lane.<br>
// Instcombine will make this a no-op.<br>
- const VectorParts &Cond = getVectorValue(I.getOperand(0)<wbr>);<br>
- const VectorParts &Op0 = getVectorValue(I.getOperand(1)<wbr>);<br>
- const VectorParts &Op1 = getVectorValue(I.getOperand(2)<wbr>);<br>
<br>
- auto *ScalarCond = getScalarValue(I.getOperand(0)<wbr>, 0, 0);<br>
+ auto *ScalarCond = getOrCreateScalarValue(I.<wbr>getOperand(0), 0, 0);<br>
<br>
- VectorParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
- Entry[Part] = Builder.CreateSelect(<br>
- InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);<br>
+ Value *Cond = getOrCreateVectorValue(I.<wbr>getOperand(0), Part);<br>
+ Value *Op0 = getOrCreateVectorValue(I.<wbr>getOperand(1), Part);<br>
+ Value *Op1 = getOrCreateVectorValue(I.<wbr>getOperand(2), Part);<br>
+ Value *Sel =<br>
+ Builder.CreateSelect(<wbr>InvariantCond ? ScalarCond : Cond, Op0, Op1);<br>
+ VectorLoopValueMap.<wbr>setVectorValue(&I, Part, Sel);<br>
+ addMetadata(Sel, &I);<br>
}<br>
<br>
- VectorLoopValueMap.initVector(<wbr>&I, Entry);<br>
- addMetadata(Entry, &I);<br>
break;<br>
}<br>
<br>
@@ -4854,22 +4867,20 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
bool FCmp = (I.getOpcode() == Instruction::FCmp);<br>
auto *Cmp = dyn_cast<CmpInst>(&I);<br>
setDebugLocFromInst(Builder, Cmp);<br>
- const VectorParts &A = getVectorValue(Cmp-><wbr>getOperand(0));<br>
- const VectorParts &B = getVectorValue(Cmp-><wbr>getOperand(1));<br>
- VectorParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *A = getOrCreateVectorValue(Cmp-><wbr>getOperand(0), Part);<br>
+ Value *B = getOrCreateVectorValue(Cmp-><wbr>getOperand(1), Part);<br>
Value *C = nullptr;<br>
if (FCmp) {<br>
- C = Builder.CreateFCmp(Cmp-><wbr>getPredicate(), A[Part], B[Part]);<br>
+ C = Builder.CreateFCmp(Cmp-><wbr>getPredicate(), A, B);<br>
cast<FCmpInst>(C)-><wbr>copyFastMathFlags(Cmp);<br>
} else {<br>
- C = Builder.CreateICmp(Cmp-><wbr>getPredicate(), A[Part], B[Part]);<br>
+ C = Builder.CreateICmp(Cmp-><wbr>getPredicate(), A, B);<br>
}<br>
- Entry[Part] = C;<br>
+ VectorLoopValueMap.<wbr>setVectorValue(&I, Part, C);<br>
+ addMetadata(C, &I);<br>
}<br>
<br>
- VectorLoopValueMap.initVector(<wbr>&I, Entry);<br>
- addMetadata(Entry, &I);<br>
break;<br>
}<br>
<br>
@@ -4906,12 +4917,12 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
Type *DestTy =<br>
(VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);<br>
<br>
- const VectorParts &A = getVectorValue(CI->getOperand(<wbr>0));<br>
- VectorParts Entry(UF);<br>
- for (unsigned Part = 0; Part < UF; ++Part)<br>
- Entry[Part] = Builder.CreateCast(CI-><wbr>getOpcode(), A[Part], DestTy);<br>
- VectorLoopValueMap.initVector(<wbr>&I, Entry);<br>
- addMetadata(Entry, &I);<br>
+ for (unsigned Part = 0; Part < UF; ++Part) {<br>
+ Value *A = getOrCreateVectorValue(CI-><wbr>getOperand(0), Part);<br>
+ Value *Cast = Builder.CreateCast(CI-><wbr>getOpcode(), A, DestTy);<br>
+ VectorLoopValueMap.<wbr>setVectorValue(&I, Part, Cast);<br>
+ addMetadata(Cast, &I);<br>
+ }<br>
break;<br>
}<br>
<br>
@@ -4949,17 +4960,14 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
break;<br>
}<br>
<br>
- VectorParts Entry(UF);<br>
for (unsigned Part = 0; Part < UF; ++Part) {<br>
SmallVector<Value *, 4> Args;<br>
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {<br>
Value *Arg = CI->getArgOperand(i);<br>
// Some intrinsics have a scalar argument - don't replace it with a<br>
// vector.<br>
- if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(<wbr>ID, i)) {<br>
- const VectorParts &VectorArg = getVectorValue(CI-><wbr>getArgOperand(i));<br>
- Arg = VectorArg[Part];<br>
- }<br>
+ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(<wbr>ID, i))<br>
+ Arg = getOrCreateVectorValue(CI-><wbr>getArgOperand(i), Part);<br>
Args.push_back(Arg);<br>
}<br>
<br>
@@ -4992,11 +5000,10 @@ void InnerLoopVectorizer::<wbr>vectorizeInstr<br>
if (isa<FPMathOperator>(V))<br>
V->copyFastMathFlags(CI);<br>
<br>
- Entry[Part] = V;<br>
+ VectorLoopValueMap.<wbr>setVectorValue(&I, Part, V);<br>
+ addMetadata(V, &I);<br>
}<br>
<br>
- VectorLoopValueMap.initVector(<wbr>&I, Entry);<br>
- addMetadata(Entry, &I);<br>
break;<br>
}<br>
<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>