[llvm] r323447 - Revert "[SLP] Fix for PR32086: Count InsertElementInstr of the same elements as shuffle."
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 29 09:44:53 PST 2018
Philip,
Thanks, will do this next time.
-------------
Best regards,
Alexey Bataev
29.01.2018 12:40, Philip Reames пишет:
> Alexey,
>
> It is customary when reverting a patch to reply to the commit thread
> stating the patch has been reverted and with what commit ID. It is
> also customary to include a brief summary of the bugs fixed when
> resubmitting. This makes it easier for the broader community to
> follow along and see the progress being made.
>
> Philip
>
>
> On 01/25/2018 09:28 AM, Alexey Bataev via llvm-commits wrote:
>> Author: abataev
>> Date: Thu Jan 25 09:28:12 2018
>> New Revision: 323447
>>
>> URL:
>> https://nam04.safelinks.protection.outlook.com/?url=http%3A%2F%2Fllvm.org%2Fviewvc%2Fllvm-project%3Frev%3D323447%26view%3Drev&data=02%7C01%7C%7Ca0ca24dbbe9544e0eaaa08d5673f6a31%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636528444413469003&sdata=mvrIotuGH63pZLvrxjMYqfgaRVHDtE51kdUXq0faxRY%3D&reserved=0
>> Log:
>> Revert "[SLP] Fix for PR32086: Count InsertElementInstr of the same
>> elements as shuffle."
>>
>> This reverts commit r323441 to fix buildbots.
>>
>> Modified:
>> llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll
>>
>> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> URL:
>> https://nam04.safelinks.protection.outlook.com/?url=http%3A%2F%2Fllvm.org%2Fviewvc%2Fllvm-project%2Fllvm%2Ftrunk%2Flib%2FTransforms%2FVectorize%2FSLPVectorizer.cpp%3Frev%3D323447%26r1%3D323446%26r2%3D323447%26view%3Ddiff&data=02%7C01%7C%7Ca0ca24dbbe9544e0eaaa08d5673f6a31%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636528444413469003&sdata=eVEC4BxzFysQxlVx5tkiAsOQrKbkjNClD6KMjzvY0L4%3D&reserved=0
>> ==============================================================================
>>
>> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Thu Jan 25
>> 09:28:12 2018
>> @@ -662,9 +662,13 @@ private:
>> /// Vectorize a single entry in the tree, starting in \p VL.
>> Value *vectorizeTree(ArrayRef<Value *> VL);
>> + /// \returns the pointer to the vectorized value if \p VL is
>> already
>> + /// vectorized, or NULL. They may happen in cycles.
>> + Value *alreadyVectorized(ArrayRef<Value *> VL, Value *OpValue) const;
>> +
>> /// \returns the scalarization cost for this type. Scalarization
>> in this
>> /// context means the creation of vectors from a group of scalars.
>> - int getGatherCost(Type *Ty, const DenseSet<unsigned>
>> &ShuffledIndices);
>> + int getGatherCost(Type *Ty);
>> /// \returns the scalarization cost for this list of values.
>> Assuming that
>> /// this subtree gets vectorized, we may need to extract the
>> values from the
>> @@ -698,12 +702,8 @@ private:
>> /// \returns true if the scalars in VL are equal to this entry.
>> bool isSame(ArrayRef<Value *> VL) const {
>> - if (VL.size() == Scalars.size())
>> - return std::equal(VL.begin(), VL.end(), Scalars.begin());
>> - return VL.size() == ReuseShuffleIndices.size() &&
>> - std::equal(
>> - VL.begin(), VL.end(), ReuseShuffleIndices.begin(),
>> - [this](Value *V, unsigned Idx) { return V ==
>> Scalars[Idx]; });
>> + assert(VL.size() == Scalars.size() && "Invalid size");
>> + return std::equal(VL.begin(), VL.end(), Scalars.begin());
>> }
>> /// A vector of scalars.
>> @@ -715,9 +715,6 @@ private:
>> /// Do we need to gather this sequence ?
>> bool NeedToGather = false;
>> - /// Does this sequence require some shuffling?
>> - SmallVector<unsigned, 4> ReuseShuffleIndices;
>> -
>> /// Points back to the VectorizableTree.
>> ///
>> /// Only used for Graphviz right now. Unfortunately
>> GraphTrait::NodeRef has
>> @@ -732,15 +729,13 @@ private:
>> };
>> /// Create a new VectorizableTree entry.
>> - void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int
>> &UserTreeIdx,
>> - ArrayRef<unsigned> ReuseShuffleIndices = None) {
>> + TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
>> + int &UserTreeIdx) {
>> VectorizableTree.emplace_back(VectorizableTree);
>> int idx = VectorizableTree.size() - 1;
>> TreeEntry *Last = &VectorizableTree[idx];
>> Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
>> Last->NeedToGather = !Vectorized;
>> - Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
>> - ReuseShuffleIndices.end());
>> if (Vectorized) {
>> for (int i = 0, e = VL.size(); i != e; ++i) {
>> assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
>> @@ -753,6 +748,7 @@ private:
>> if (UserTreeIdx >= 0)
>> Last->UserTreeIndices.push_back(UserTreeIdx);
>> UserTreeIdx = idx;
>> + return Last;
>> }
>> /// -- Vectorization State --
>> @@ -766,6 +762,13 @@ private:
>> return nullptr;
>> }
>> + const TreeEntry *getTreeEntry(Value *V) const {
>> + auto I = ScalarToTreeEntry.find(V);
>> + if (I != ScalarToTreeEntry.end())
>> + return &VectorizableTree[I->second];
>> + return nullptr;
>> + }
>> +
>> /// Maps a specific scalar to its tree entry.
>> SmallDenseMap<Value*, int> ScalarToTreeEntry;
>> @@ -1429,11 +1432,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> // Check if this is a duplicate of another entry.
>> if (TreeEntry *E = getTreeEntry(S.OpValue)) {
>> - DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
>> - if (!E->isSame(VL)) {
>> - DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
>> - newTreeEntry(VL, false, UserTreeIdx);
>> - return;
>> + for (unsigned i = 0, e = VL.size(); i != e; ++i) {
>> + DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
>> + if (E->Scalars[i] != VL[i]) {
>> + DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
>> + newTreeEntry(VL, false, UserTreeIdx);
>> + return;
>> + }
>> }
>> // Record the reuse of the tree node. FIXME, currently this is
>> only used to
>> // properly draw the graph rather than for the actual
>> vectorization.
>> @@ -1479,26 +1484,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> }
>> // Check that every instruction appears once in this bundle.
>> - SmallVector<unsigned, 4> ReuseShuffleIndicies;
>> - SmallVector<Value *, 4> UniqueValues;
>> - DenseMap<Value *, unsigned> UniquePositions;
>> - for (Value *V : VL) {
>> - auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
>> - ReuseShuffleIndicies.emplace_back(Res.first->second);
>> - if (Res.second)
>> - UniqueValues.emplace_back(V);
>> - }
>> - if (UniqueValues.size() == VL.size()) {
>> - ReuseShuffleIndicies.clear();
>> - } else {
>> - DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
>> - if (UniqueValues.size() <= 1 ||
>> !llvm::isPowerOf2_32(UniqueValues.size())) {
>> - DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
>> - newTreeEntry(VL, false, UserTreeIdx);
>> - return;
>> - }
>> - VL = UniqueValues;
>> - }
>> + for (unsigned i = 0, e = VL.size(); i < e; ++i)
>> + for (unsigned j = i + 1; j < e; ++j)
>> + if (VL[i] == VL[j]) {
>> + DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
>> + newTreeEntry(VL, false, UserTreeIdx);
>> + return;
>> + }
>> auto &BSRef = BlocksSchedules[BB];
>> if (!BSRef)
>> @@ -1506,12 +1498,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> BlockScheduling &BS = *BSRef.get();
>> - if (!BS.tryScheduleBundle(VL, this, VL0)) {
>> + if (!BS.tryScheduleBundle(VL, this, S.OpValue)) {
>> DEBUG(dbgs() << "SLP: We are not able to schedule this
>> bundle!\n");
>> assert((!BS.getScheduleData(VL0) ||
>> !BS.getScheduleData(VL0)->isPartOfBundle()) &&
>> "tryScheduleBundle should cancelScheduling on failure");
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> return;
>> }
>> DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
>> @@ -1530,12 +1522,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> if (Term) {
>> DEBUG(dbgs() << "SLP: Need to swizzle PHINodes
>> (TerminatorInst use).\n");
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> return;
>> }
>> }
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
>> for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e;
>> ++i) {
>> @@ -1553,7 +1545,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> case Instruction::ExtractElement: {
>> bool Reuse = canReuseExtract(VL, VL0);
>> if (Reuse) {
>> - DEBUG(dbgs() << "SLP: Reusing or shuffling extract
>> sequence.\n");
>> + DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
>> ++NumOpsWantToKeepOrder[S.Opcode];
>> } else {
>> SmallVector<Value *, 4> ReverseVL(VL.rbegin(), VL.rend());
>> @@ -1561,7 +1553,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> --NumOpsWantToKeepOrder[S.Opcode];
>> BS.cancelScheduling(VL, VL0);
>> }
>> - newTreeEntry(VL, Reuse, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, Reuse, UserTreeIdx);
>> return;
>> }
>> case Instruction::Load: {
>> @@ -1576,7 +1568,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> if (DL->getTypeSizeInBits(ScalarTy) !=
>> DL->getTypeAllocSizeInBits(ScalarTy)) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
>> return;
>> }
>> @@ -1587,7 +1579,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> LoadInst *L = cast<LoadInst>(VL[i]);
>> if (!L->isSimple()) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
>> return;
>> }
>> @@ -1609,7 +1601,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> if (Consecutive) {
>> ++NumOpsWantToKeepOrder[S.Opcode];
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a vector of loads.\n");
>> return;
>> }
>> @@ -1624,7 +1616,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> }
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> if (ReverseConsecutive) {
>> --NumOpsWantToKeepOrder[S.Opcode];
>> @@ -1651,12 +1643,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
>> if (Ty != SrcTy || !isValidElementType(Ty)) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: Gathering casts with different src
>> types.\n");
>> return;
>> }
>> }
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a vector of casts.\n");
>> for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
>> @@ -1679,13 +1671,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> if (Cmp->getPredicate() != P0 ||
>> Cmp->getOperand(0)->getType() != ComparedTy) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: Gathering cmp with different
>> predicate.\n");
>> return;
>> }
>> }
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a vector of compares.\n");
>> for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
>> @@ -1717,7 +1709,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> case Instruction::And:
>> case Instruction::Or:
>> case Instruction::Xor:
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
>> // Sort operands of the instructions so that each side is
>> more likely to
>> @@ -1746,7 +1738,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
>> DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested
>> indexes).\n");
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> return;
>> }
>> }
>> @@ -1759,7 +1751,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> if (Ty0 != CurTy) {
>> DEBUG(dbgs() << "SLP: not-vectorizable GEP (different
>> types).\n");
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> return;
>> }
>> }
>> @@ -1771,12 +1763,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> DEBUG(
>> dbgs() << "SLP: not-vectorizable GEP (non-constant
>> indexes).\n");
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> return;
>> }
>> }
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
>> for (unsigned i = 0, e = 2; i < e; ++i) {
>> ValueList Operands;
>> @@ -1793,12 +1785,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
>> if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
>> return;
>> }
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a vector of stores.\n");
>> ValueList Operands;
>> @@ -1816,7 +1808,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
>> if (!isTriviallyVectorizable(ID)) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
>> return;
>> }
>> @@ -1830,7 +1822,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
>> !CI->hasIdenticalOperandBundleSchema(*CI2)) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!="
>> << *VL[i]
>> << "\n");
>> return;
>> @@ -1841,7 +1833,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> Value *A1J = CI2->getArgOperand(1);
>> if (A1I != A1J) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: mismatched arguments in call:" <<
>> *CI
>> << " argument "<< A1I<<"!=" << A1J
>> << "\n");
>> @@ -1854,14 +1846,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> CI->op_begin() +
>> CI->getBundleOperandsEndIndex(),
>> CI2->op_begin() +
>> CI2->getBundleOperandsStartIndex())) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: mismatched bundle operands in
>> calls:" << *CI << "!="
>> << *VL[i] << '\n');
>> return;
>> }
>> }
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
>> ValueList Operands;
>> // Prepare the operand vector.
>> @@ -1878,11 +1870,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> // then do not vectorize this instruction.
>> if (!S.IsAltShuffle) {
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
>> return;
>> }
>> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, true, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
>> // Reorder operands if reordering would enable vectorization.
>> @@ -1906,7 +1898,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> default:
>> BS.cancelScheduling(VL, VL0);
>> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
>> + newTreeEntry(VL, false, UserTreeIdx);
>> DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
>> return;
>> }
>> @@ -1999,22 +1991,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> VecTy = VectorType::get(
>> IntegerType::get(F->getContext(), MinBWs[VL[0]].first),
>> VL.size());
>> - unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
>> - bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
>> - int ReuseShuffleCost = 0;
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost =
>> -
>> TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
>> - }
>> if (E->NeedToGather) {
>> if (allConstant(VL))
>> return 0;
>> if (isSplat(VL)) {
>> - return ReuseShuffleCost +
>> - TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
>> VecTy, 0);
>> + return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
>> VecTy, 0);
>> }
>> - if (getSameOpcode(VL).Opcode == Instruction::ExtractElement &&
>> - allSameType(VL) && allSameBlock(VL)) {
>> + if (getSameOpcode(VL).Opcode == Instruction::ExtractElement) {
>> Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
>> isShuffle(VL);
>> if (ShuffleKind.hasValue()) {
>> int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
>> @@ -2031,10 +2014,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> IO->getZExtValue());
>> }
>> }
>> - return ReuseShuffleCost + Cost;
>> + return Cost;
>> }
>> }
>> - return ReuseShuffleCost + getGatherCost(VL);
>> + return getGatherCost(E->Scalars);
>> }
>> InstructionsState S = getSameOpcode(VL);
>> assert(S.Opcode && allSameType(VL) && allSameBlock(VL) &&
>> "Invalid VL");
>> @@ -2047,36 +2030,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> case Instruction::ExtractValue:
>> case Instruction::ExtractElement:
>> - if (NeedToShuffleReuses) {
>> - unsigned Idx = 0;
>> - for (unsigned I : E->ReuseShuffleIndices) {
>> - if (ShuffleOrOp == Instruction::ExtractElement) {
>> - auto *IO = cast<ConstantInt>(
>> - cast<ExtractElementInst>(VL[I])->getIndexOperand());
>> - Idx = IO->getZExtValue();
>> - ReuseShuffleCost -= TTI->getVectorInstrCost(
>> - Instruction::ExtractElement, VecTy, Idx);
>> - } else {
>> - ReuseShuffleCost -= TTI->getVectorInstrCost(
>> - Instruction::ExtractElement, VecTy, Idx);
>> - ++Idx;
>> - }
>> - }
>> - Idx = ReuseShuffleNumbers;
>> - for (Value *V : VL) {
>> - if (ShuffleOrOp == Instruction::ExtractElement) {
>> - auto *IO = cast<ConstantInt>(
>> - cast<ExtractElementInst>(V)->getIndexOperand());
>> - Idx = IO->getZExtValue();
>> - } else {
>> - --Idx;
>> - }
>> - ReuseShuffleCost +=
>> - TTI->getVectorInstrCost(Instruction::ExtractElement,
>> VecTy, Idx);
>> - }
>> - }
>> if (canReuseExtract(VL, S.OpValue)) {
>> - int DeadCost = ReuseShuffleCost;
>> + int DeadCost = 0;
>> for (unsigned i = 0, e = VL.size(); i < e; ++i) {
>> Instruction *E = cast<Instruction>(VL[i]);
>> // If all users are going to be vectorized, instruction
>> can be
>> @@ -2084,12 +2039,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> // The same, if have only one user, it will be vectorized
>> for sure.
>> if (areAllUsersVectorized(E))
>> // Take credit for instruction that will become dead.
>> - DeadCost -=
>> + DeadCost +=
>>
>> TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
>> }
>> - return DeadCost;
>> + return -DeadCost;
>> }
>> - return ReuseShuffleCost + getGatherCost(VL);
>> + return getGatherCost(VecTy);
>> case Instruction::ZExt:
>> case Instruction::SExt:
>> @@ -2104,11 +2059,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> case Instruction::FPTrunc:
>> case Instruction::BitCast: {
>> Type *SrcTy = VL0->getOperand(0)->getType();
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost -=
>> - (ReuseShuffleNumbers - VL.size()) *
>> - TTI->getCastInstrCost(S.Opcode, ScalarTy, SrcTy, VL0);
>> - }
>> // Calculate the cost of this instruction.
>> int ScalarCost = VL.size() *
>> TTI->getCastInstrCost(VL0->getOpcode(),
>> @@ -2117,26 +2067,19 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
>> int VecCost = 0;
>> // Check if the values are candidates to demote.
>> - if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
>> - VecCost = ReuseShuffleCost +
>> - TTI->getCastInstrCost(VL0->getOpcode(), VecTy,
>> SrcVecTy, VL0);
>> - }
>> + if (!MinBWs.count(VL0) || VecTy != SrcVecTy)
>> + VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy,
>> SrcVecTy, VL0);
>> return VecCost - ScalarCost;
>> }
>> case Instruction::FCmp:
>> case Instruction::ICmp:
>> case Instruction::Select: {
>> // Calculate the cost of this instruction.
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
>> - TTI->getCmpSelInstrCost(S.Opcode, ScalarTy,
>> -
>> Builder.getInt1Ty(), VL0);
>> - }
>> VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(),
>> VL.size());
>> int ScalarCost = VecTy->getNumElements() *
>> TTI->getCmpSelInstrCost(S.Opcode, ScalarTy,
>> Builder.getInt1Ty(), VL0);
>> int VecCost = TTI->getCmpSelInstrCost(S.Opcode, VecTy,
>> MaskTy, VL0);
>> - return ReuseShuffleCost + VecCost - ScalarCost;
>> + return VecCost - ScalarCost;
>> }
>> case Instruction::Add:
>> case Instruction::FAdd:
>> @@ -2194,19 +2137,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> Op2VP = TargetTransformInfo::OP_PowerOf2;
>> SmallVector<const Value *, 4> Operands(VL0->operand_values());
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost -=
>> - (ReuseShuffleNumbers - VL.size()) *
>> - TTI->getArithmeticInstrCost(S.Opcode, ScalarTy, Op1VK,
>> Op2VK, Op1VP,
>> - Op2VP, Operands);
>> - }
>> int ScalarCost =
>> VecTy->getNumElements() *
>> TTI->getArithmeticInstrCost(S.Opcode, ScalarTy, Op1VK,
>> Op2VK, Op1VP,
>> Op2VP, Operands);
>> int VecCost = TTI->getArithmeticInstrCost(S.Opcode, VecTy,
>> Op1VK, Op2VK,
>> Op1VP, Op2VP,
>> Operands);
>> - return ReuseShuffleCost + VecCost - ScalarCost;
>> + return VecCost - ScalarCost;
>> }
>> case Instruction::GetElementPtr: {
>> TargetTransformInfo::OperandValueKind Op1VK =
>> @@ -2214,46 +2151,31 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> TargetTransformInfo::OperandValueKind Op2VK =
>> TargetTransformInfo::OK_UniformConstantValue;
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
>> -
>> TTI->getArithmeticInstrCost(Instruction::Add,
>> - ScalarTy,
>> Op1VK, Op2VK);
>> - }
>> int ScalarCost =
>> VecTy->getNumElements() *
>> TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy,
>> Op1VK, Op2VK);
>> int VecCost =
>> TTI->getArithmeticInstrCost(Instruction::Add, VecTy,
>> Op1VK, Op2VK);
>> - return ReuseShuffleCost + VecCost - ScalarCost;
>> + return VecCost - ScalarCost;
>> }
>> case Instruction::Load: {
>> // Cost of wide load - cost of scalar loads.
>> unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment();
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
>> - TTI->getMemoryOpCost(Instruction::Load,
>> ScalarTy,
>> - alignment, 0, VL0);
>> - }
>> int ScalarLdCost = VecTy->getNumElements() *
>> TTI->getMemoryOpCost(Instruction::Load, ScalarTy,
>> alignment, 0, VL0);
>> int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
>> VecTy, alignment, 0, VL0);
>> - return ReuseShuffleCost + VecLdCost - ScalarLdCost;
>> + return VecLdCost - ScalarLdCost;
>> }
>> case Instruction::Store: {
>> // We know that we can merge the stores. Calculate the cost.
>> unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment();
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
>> - TTI->getMemoryOpCost(Instruction::Store,
>> ScalarTy,
>> - alignment, 0, VL0);
>> - }
>> int ScalarStCost = VecTy->getNumElements() *
>> TTI->getMemoryOpCost(Instruction::Store, ScalarTy,
>> alignment, 0, VL0);
>> int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
>> VecTy, alignment, 0, VL0);
>> - return ReuseShuffleCost + VecStCost - ScalarStCost;
>> + return VecStCost - ScalarStCost;
>> }
>> case Instruction::Call: {
>> CallInst *CI = cast<CallInst>(VL0);
>> @@ -2268,11 +2190,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
>> FMF = FPMO->getFastMathFlags();
>> - if (NeedToShuffleReuses) {
>> - ReuseShuffleCost -=
>> - (ReuseShuffleNumbers - VL.size()) *
>> - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
>> - }
>> int ScalarCallCost = VecTy->getNumElements() *
>> TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
>> @@ -2284,7 +2201,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> << " (" << VecCallCost << "-" << ScalarCallCost << ")"
>> << " for " << *CI << "\n");
>> - return ReuseShuffleCost + VecCallCost - ScalarCallCost;
>> + return VecCallCost - ScalarCallCost;
>> }
>> case Instruction::ShuffleVector: {
>> TargetTransformInfo::OperandValueKind Op1VK =
>> @@ -2292,22 +2209,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> TargetTransformInfo::OperandValueKind Op2VK =
>> TargetTransformInfo::OK_AnyValue;
>> int ScalarCost = 0;
>> - if (NeedToShuffleReuses) {
>> - for (unsigned Idx : E->ReuseShuffleIndices) {
>> - Instruction *I = cast<Instruction>(VL[Idx]);
>> - if (!I)
>> - continue;
>> - ReuseShuffleCost -= TTI->getArithmeticInstrCost(
>> - I->getOpcode(), ScalarTy, Op1VK, Op2VK);
>> - }
>> - for (Value *V : VL) {
>> - Instruction *I = cast<Instruction>(V);
>> - if (!I)
>> - continue;
>> - ReuseShuffleCost += TTI->getArithmeticInstrCost(
>> - I->getOpcode(), ScalarTy, Op1VK, Op2VK);
>> - }
>> - }
>> int VecCost = 0;
>> for (Value *i : VL) {
>> Instruction *I = cast<Instruction>(i);
>> @@ -2326,7 +2227,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>> TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy,
>> Op1VK, Op2VK);
>> VecCost +=
>> TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
>> VecTy, 0);
>> - return ReuseShuffleCost + VecCost - ScalarCost;
>> + return VecCost - ScalarCost;
>> }
>> default:
>> llvm_unreachable("Unknown instruction");
>> @@ -2502,14 +2403,10 @@ int BoUpSLP::getTreeCost() {
>> return Cost;
>> }
>> -int BoUpSLP::getGatherCost(Type *Ty,
>> - const DenseSet<unsigned> &ShuffledIndices) {
>> +int BoUpSLP::getGatherCost(Type *Ty) {
>> int Cost = 0;
>> for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements();
>> i < e; ++i)
>> - if (!ShuffledIndices.count(i))
>> - Cost += TTI->getVectorInstrCost(Instruction::InsertElement,
>> Ty, i);
>> - if (!ShuffledIndices.empty())
>> - Cost +=
>> TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
>> + Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
>> return Cost;
>> }
>> @@ -2520,17 +2417,7 @@ int BoUpSLP::getGatherCost(ArrayRef<Valu
>> ScalarTy = SI->getValueOperand()->getType();
>> VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
>> // Find the cost of inserting/extracting values from the vector.
>> - // Check if the same elements are inserted several times and count
>> them as
>> - // shuffle candidates.
>> - DenseSet<unsigned> ShuffledElements;
>> - DenseSet<Value *> UniqueElements;
>> - // Iterate in reverse order to consider insert elements with the
>> high cost.
>> - for (unsigned I = VL.size(); I > 0; --I) {
>> - unsigned Idx = I - 1;
>> - if (!UniqueElements.insert(VL[Idx]).second)
>> - ShuffledElements.insert(Idx);
>> - }
>> - return getGatherCost(VecTy, ShuffledElements);
>> + return getGatherCost(VecTy);
>> }
>> // Reorder commutative operations in alternate shuffle if the
>> resulting vectors
>> @@ -2828,7 +2715,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *>
>> if (TreeEntry *E = getTreeEntry(VL[i])) {
>> // Find which lane we need to extract.
>> int FoundLane = -1;
>> - for (unsigned Lane = 0, LE = E->Scalars.size(); Lane != LE;
>> ++Lane) {
>> + for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) {
>> // Is this the lane of the scalar that we are looking for ?
>> if (E->Scalars[Lane] == VL[i]) {
>> FoundLane = Lane;
>> @@ -2844,6 +2731,14 @@ Value *BoUpSLP::Gather(ArrayRef<Value *>
>> return Vec;
>> }
>> +Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL, Value
>> *OpValue) const {
>> + if (const TreeEntry *En = getTreeEntry(OpValue)) {
>> + if (En->isSame(VL) && En->VectorizedValue)
>> + return En->VectorizedValue;
>> + }
>> + return nullptr;
>> +}
>> +
>> Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
>> InstructionsState S = getSameOpcode(VL);
>> if (S.Opcode) {
>> @@ -2856,38 +2751,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<V
>> Type *ScalarTy = S.OpValue->getType();
>> if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
>> ScalarTy = SI->getValueOperand()->getType();
>> -
>> - // Check that every instruction appears once in this bundle.
>> - SmallVector<unsigned, 4> ReuseShuffleIndicies;
>> - SmallVector<Value *, 4> UniqueValues;
>> - if (VL.size() > 2) {
>> - DenseMap<Value *, unsigned> UniquePositions;
>> - for (Value *V : VL) {
>> - auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
>> - ReuseShuffleIndicies.emplace_back(Res.first->second);
>> - if (Res.second || isa<Constant>(V))
>> - UniqueValues.emplace_back(V);
>> - }
>> - // Do not shuffle single element or if number of unique values
>> is not power
>> - // of 2.
>> - if (UniqueValues.size() == VL.size() || UniqueValues.size() <= 1 ||
>> - !llvm::isPowerOf2_32(UniqueValues.size()))
>> - ReuseShuffleIndicies.clear();
>> - else
>> - VL = UniqueValues;
>> - }
>> VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
>> - Value *V = Gather(VL, VecTy);
>> - if (!ReuseShuffleIndicies.empty()) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - ReuseShuffleIndicies, "shuffle");
>> - if (auto *I = dyn_cast<Instruction>(V)) {
>> - GatherSeq.insert(I);
>> - CSEBlocks.insert(I->getParent());
>> - }
>> - }
>> - return V;
>> + return Gather(VL, VecTy);
>> }
>> Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
>> @@ -2905,19 +2771,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> ScalarTy = SI->getValueOperand()->getType();
>> VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
>> - bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
>> -
>> if (E->NeedToGather) {
>> setInsertPointAfterBundle(E->Scalars, VL0);
>> auto *V = Gather(E->Scalars, VecTy);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - if (auto *I = dyn_cast<Instruction>(V)) {
>> - GatherSeq.insert(I);
>> - CSEBlocks.insert(I->getParent());
>> - }
>> - }
>> E->VectorizedValue = V;
>> return V;
>> }
>> @@ -2930,12 +2786,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
>> Builder.SetCurrentDebugLocation(PH->getDebugLoc());
>> PHINode *NewPhi = Builder.CreatePHI(VecTy,
>> PH->getNumIncomingValues());
>> - Value *V = NewPhi;
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> - E->VectorizedValue = V;
>> + E->VectorizedValue = NewPhi;
>> // PHINodes may have multiple entries from the same block.
>> We want to
>> // visit every block once.
>> @@ -2962,30 +2813,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> assert(NewPhi->getNumIncomingValues() ==
>> PH->getNumIncomingValues() &&
>> "Invalid number of incoming values");
>> - return V;
>> + return NewPhi;
>> }
>> case Instruction::ExtractElement: {
>> if (canReuseExtract(E->Scalars, VL0)) {
>> Value *V = VL0->getOperand(0);
>> - if (NeedToShuffleReuses) {
>> - Builder.SetInsertPoint(VL0);
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> return V;
>> }
>> setInsertPointAfterBundle(E->Scalars, VL0);
>> auto *V = Gather(E->Scalars, VecTy);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - if (auto *I = dyn_cast<Instruction>(V)) {
>> - GatherSeq.insert(I);
>> - CSEBlocks.insert(I->getParent());
>> - }
>> - }
>> E->VectorizedValue = V;
>> return V;
>> }
>> @@ -2996,24 +2834,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> PointerType *PtrTy = PointerType::get(VecTy,
>> LI->getPointerAddressSpace());
>> Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
>> LoadInst *V = Builder.CreateAlignedLoad(Ptr,
>> LI->getAlignment());
>> - Value *NewV = propagateMetadata(V, E->Scalars);
>> - if (NeedToShuffleReuses) {
>> - NewV = Builder.CreateShuffleVector(
>> - NewV, UndefValue::get(VecTy), E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> - E->VectorizedValue = NewV;
>> - return NewV;
>> + E->VectorizedValue = V;
>> + return propagateMetadata(V, E->Scalars);
>> }
>> setInsertPointAfterBundle(E->Scalars, VL0);
>> auto *V = Gather(E->Scalars, VecTy);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - if (auto *I = dyn_cast<Instruction>(V)) {
>> - GatherSeq.insert(I);
>> - CSEBlocks.insert(I->getParent());
>> - }
>> - }
>> E->VectorizedValue = V;
>> return V;
>> }
>> @@ -3037,17 +2862,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Value *InVec = vectorizeTree(INVL);
>> - if (E->VectorizedValue) {
>> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
>> - return E->VectorizedValue;
>> - }
>> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
>> + return V;
>> CastInst *CI = dyn_cast<CastInst>(VL0);
>> Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> ++NumVectorInstructions;
>> return V;
>> @@ -3065,10 +2884,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Value *L = vectorizeTree(LHSV);
>> Value *R = vectorizeTree(RHSV);
>> - if (E->VectorizedValue) {
>> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
>> - return E->VectorizedValue;
>> - }
>> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
>> + return V;
>> CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
>> Value *V;
>> @@ -3077,12 +2894,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> else
>> V = Builder.CreateICmp(P0, L, R);
>> - propagateIRFlags(V, E->Scalars, VL0);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> + propagateIRFlags(E->VectorizedValue, E->Scalars, VL0);
>> ++NumVectorInstructions;
>> return V;
>> }
>> @@ -3100,16 +2913,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Value *True = vectorizeTree(TrueVec);
>> Value *False = vectorizeTree(FalseVec);
>> - if (E->VectorizedValue) {
>> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
>> - return E->VectorizedValue;
>> - }
>> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
>> + return V;
>> Value *V = Builder.CreateSelect(Cond, True, False);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> ++NumVectorInstructions;
>> return V;
>> @@ -3148,24 +2955,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Value *LHS = vectorizeTree(LHSVL);
>> Value *RHS = vectorizeTree(RHSVL);
>> - if (E->VectorizedValue) {
>> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
>> - return E->VectorizedValue;
>> - }
>> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
>> + return V;
>> Value *V = Builder.CreateBinOp(
>> static_cast<Instruction::BinaryOps>(S.Opcode), LHS, RHS);
>> - propagateIRFlags(V, E->Scalars, VL0);
>> - if (auto *I = dyn_cast<Instruction>(V))
>> - V = propagateMetadata(I, E->Scalars);
>> -
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> + propagateIRFlags(E->VectorizedValue, E->Scalars, VL0);
>> ++NumVectorInstructions;
>> + if (Instruction *I = dyn_cast<Instruction>(V))
>> + return propagateMetadata(I, E->Scalars);
>> +
>> return V;
>> }
>> case Instruction::Load: {
>> @@ -3193,14 +2994,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Alignment = DL->getABITypeAlignment(ScalarLoadTy);
>> }
>> LI->setAlignment(Alignment);
>> - Value *V = propagateMetadata(LI, E->Scalars);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> - E->VectorizedValue = V;
>> + E->VectorizedValue = LI;
>> ++NumVectorInstructions;
>> - return V;
>> + return propagateMetadata(LI, E->Scalars);
>> }
>> case Instruction::Store: {
>> StoreInst *SI = cast<StoreInst>(VL0);
>> @@ -3228,14 +3024,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Alignment =
>> DL->getABITypeAlignment(SI->getValueOperand()->getType());
>> S->setAlignment(Alignment);
>> - Value *V = propagateMetadata(S, E->Scalars);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> - E->VectorizedValue = V;
>> + E->VectorizedValue = S;
>> ++NumVectorInstructions;
>> - return V;
>> + return propagateMetadata(S, E->Scalars);
>> }
>> case Instruction::GetElementPtr: {
>> setInsertPointAfterBundle(E->Scalars, VL0);
>> @@ -3259,16 +3050,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Value *V = Builder.CreateGEP(
>> cast<GetElementPtrInst>(VL0)->getSourceElementType(),
>> Op0, OpVecs);
>> - if (Instruction *I = dyn_cast<Instruction>(V))
>> - V = propagateMetadata(I, E->Scalars);
>> -
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> ++NumVectorInstructions;
>> + if (Instruction *I = dyn_cast<Instruction>(V))
>> + return propagateMetadata(I, E->Scalars);
>> +
>> return V;
>> }
>> case Instruction::Call: {
>> @@ -3315,12 +3102,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> if (ScalarArg && getTreeEntry(ScalarArg))
>> ExternalUses.push_back(ExternalUser(ScalarArg,
>> cast<User>(V), 0));
>> - propagateIRFlags(V, E->Scalars, VL0);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> + propagateIRFlags(E->VectorizedValue, E->Scalars, VL0);
>> ++NumVectorInstructions;
>> return V;
>> }
>> @@ -3334,10 +3117,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> Value *LHS = vectorizeTree(LHSVL);
>> Value *RHS = vectorizeTree(RHSVL);
>> - if (E->VectorizedValue) {
>> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
>> - return E->VectorizedValue;
>> - }
>> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
>> + return V;
>> // Create a vector of LHS op1 RHS
>> Value *V0 = Builder.CreateBinOp(
>> @@ -3369,14 +3150,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> propagateIRFlags(V1, OddScalars);
>> Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
>> - if (Instruction *I = dyn_cast<Instruction>(V))
>> - V = propagateMetadata(I, E->Scalars);
>> - if (NeedToShuffleReuses) {
>> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
>> - E->ReuseShuffleIndices,
>> "shuffle");
>> - }
>> E->VectorizedValue = V;
>> ++NumVectorInstructions;
>> + if (Instruction *I = dyn_cast<Instruction>(V))
>> + return propagateMetadata(I, E->Scalars);
>> return V;
>> }
>> @@ -3546,12 +3323,14 @@ void BoUpSLP::optimizeGatherSequence() {
>> DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
>> << " gather sequences instructions.\n");
>> // LICM InsertElementInst sequences.
>> - for (Instruction *I : GatherSeq) {
>> - if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I))
>> + for (Instruction *it : GatherSeq) {
>> + InsertElementInst *Insert = dyn_cast<InsertElementInst>(it);
>> +
>> + if (!Insert)
>> continue;
>> // Check if this block is inside a loop.
>> - Loop *L = LI->getLoopFor(I->getParent());
>> + Loop *L = LI->getLoopFor(Insert->getParent());
>> if (!L)
>> continue;
>> @@ -3563,15 +3342,15 @@ void BoUpSLP::optimizeGatherSequence() {
>> // If the vector or the element that we insert into it are
>> // instructions that are defined in this basic block then we can't
>> // hoist this instruction.
>> - auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
>> - auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
>> - if (Op0 && L->contains(Op0))
>> + Instruction *CurrVec =
>> dyn_cast<Instruction>(Insert->getOperand(0));
>> + Instruction *NewElem =
>> dyn_cast<Instruction>(Insert->getOperand(1));
>> + if (CurrVec && L->contains(CurrVec))
>> continue;
>> - if (Op1 && L->contains(Op1))
>> + if (NewElem && L->contains(NewElem))
>> continue;
>> // We can hoist this instruction. Move it to the pre-header.
>> - I->moveBefore(PreHeader->getTerminator());
>> + Insert->moveBefore(PreHeader->getTerminator());
>> }
>> // Make a list of all reachable blocks in our CSE queue.
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll
>> URL:
>> https://nam04.safelinks.protection.outlook.com/?url=http%3A%2F%2Fllvm.org%2Fviewvc%2Fllvm-project%2Fllvm%2Ftrunk%2Ftest%2FTransforms%2FSLPVectorizer%2FX86%2FPR32086.ll%3Frev%3D323447%26r1%3D323446%26r2%3D323447%26view%3Ddiff&data=02%7C01%7C%7Ca0ca24dbbe9544e0eaaa08d5673f6a31%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636528444413469003&sdata=NKpicN1iP9oEBrVAJEITogQ%2Bcn7UrX9pdzVHFmrLi3s%3D&reserved=0
>> ==============================================================================
>>
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll Thu Jan
>> 25 09:28:12 2018
>> @@ -4,14 +4,15 @@
>> define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
>> ; CHECK-LABEL: @i64_simplified(
>> ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64,
>> i64* [[LD:%.*]], i64 1
>> -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
>> -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]],
>> align 8
>> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]],
>> <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
>> +; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
>> +; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
>> ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64,
>> i64* [[ST:%.*]], i64 1
>> ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64,
>> i64* [[ST]], i64 2
>> ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64,
>> i64* [[ST]], i64 3
>> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>*
>> -; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]],
>> align 8
>> +; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8
>> +; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX3]], align 8
>> +; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
>> +; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
>> ; CHECK-NEXT: ret void
>> ;
>> %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
>> URL:
>> https://nam04.safelinks.protection.outlook.com/?url=http%3A%2F%2Fllvm.org%2Fviewvc%2Fllvm-project%2Fllvm%2Ftrunk%2Ftest%2FTransforms%2FSLPVectorizer%2FX86%2Fblending-shuffle.ll%3Frev%3D323447%26r1%3D323446%26r2%3D323447%26view%3Ddiff&data=02%7C01%7C%7Ca0ca24dbbe9544e0eaaa08d5673f6a31%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636528444413469003&sdata=xYjEZJR7IqLGGgau83o%2BfPRrQUi2OKE%2FurY%2Br6ViHZE%3D&reserved=0
>> ==============================================================================
>>
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
>> Thu Jan 25 09:28:12 2018
>> @@ -137,19 +137,17 @@ define i8 @k(<4 x i8> %x) {
>> define i8 @k_bb(<4 x i8> %x) {
>> ; CHECK-LABEL: @k_bb(
>> -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
>> ; CHECK-NEXT: br label [[BB1:%.*]]
>> ; CHECK: bb1:
>> -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
>> -; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
>> -; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
>> -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X]], [[X]]
>> -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X0X0]], [[X3X3]]
>> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
>> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
>> -; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP3]], [[TMP4]]
>> -; CHECK-NEXT: [[TMP6:%.*]] = sdiv i8 [[TMP2]], [[TMP5]]
>> -; CHECK-NEXT: ret i8 [[TMP6]]
>> +; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
>> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4
>> x i8> undef, <2 x i32> <i32 0, i32 1>
>> +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
>> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4
>> x i8> undef, <2 x i32> <i32 3, i32 2>
>> +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
>> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
>> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
>> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
>> +; CHECK-NEXT: ret i8 [[TMP8]]
>> ;
>> %x0 = extractelement <4 x i8> %x, i32 0
>> br label %bb1
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll
>> URL:
>> https://nam04.safelinks.protection.outlook.com/?url=http%3A%2F%2Fllvm.org%2Fviewvc%2Fllvm-project%2Fllvm%2Ftrunk%2Ftest%2FTransforms%2FSLPVectorizer%2FX86%2Fhoist.ll%3Frev%3D323447%26r1%3D323446%26r2%3D323447%26view%3Ddiff&data=02%7C01%7C%7Ca0ca24dbbe9544e0eaaa08d5673f6a31%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636528444413469003&sdata=QTK%2BopMfyMoe7w8tPevAALIb1QjdWXufsTTiPeft%2FXo%3D&reserved=0
>> ==============================================================================
>>
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll Thu Jan 25
>> 09:28:12 2018
>> @@ -16,18 +16,19 @@ target triple = "i386-apple-macosx10.9.0
>> define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
>> ; CHECK-LABEL: @foo(
>> ; CHECK-NEXT: entry:
>> -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32
>> [[N:%.*]], i32 0
>> -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]],
>> i32 [[K:%.*]], i32 1
>> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]],
>> <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
>> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32
>> [[N:%.*]], i32 0
>> +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]],
>> i32 [[K:%.*]], i32 1
>> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]],
>> i32 [[N]], i32 2
>> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]],
>> i32 [[K]], i32 3
>> ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
>> ; CHECK: for.body:
>> ; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [
>> [[ADD10:%.*]], [[FOR_BODY]] ]
>> ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32,
>> i32* [[A:%.*]], i32 [[I_024]]
>> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
>> -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]],
>> align 4
>> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[SHUFFLE]], [[TMP3]]
>> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
>> -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
>> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
>> +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]],
>> align 4
>> +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]]
>> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
>> +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
>> ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4
>> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD10]], 10000
>> ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label
>> [[FOR_END:%.*]]
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> https://nam04.safelinks.protection.outlook.com/?url=http%3A%2F%2Flists.llvm.org%2Fcgi-bin%2Fmailman%2Flistinfo%2Fllvm-commits&data=02%7C01%7C%7Ca0ca24dbbe9544e0eaaa08d5673f6a31%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636528444413469003&sdata=%2BXn23%2BRRuM6xvAsZ4dF0PW%2FOaJGl4vxTju8FUK2DUPA%3D&reserved=0
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180129/e75a2dd5/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180129/e75a2dd5/attachment-0001.sig>
More information about the llvm-commits
mailing list