[llvm] r323447 - Revert "[SLP] Fix for PR32086: Count InsertElementInstr of the same elements as shuffle."
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 29 09:40:38 PST 2018
Alexey,
It is customary when reverting a patch to reply to the commit thread
stating the patch has been reverted and with what commit ID. It is also
customary to include a brief summary of the bugs fixed when
resubmitting. This makes it easier for the broader community to follow
along and see the progress being made.
Philip
On 01/25/2018 09:28 AM, Alexey Bataev via llvm-commits wrote:
> Author: abataev
> Date: Thu Jan 25 09:28:12 2018
> New Revision: 323447
>
> URL: http://llvm.org/viewvc/llvm-project?rev=323447&view=rev
> Log:
> Revert "[SLP] Fix for PR32086: Count InsertElementInstr of the same elements as shuffle."
>
> This reverts commit r323441 to fix buildbots.
>
> Modified:
> llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll
> llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
> llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=323447&r1=323446&r2=323447&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Thu Jan 25 09:28:12 2018
> @@ -662,9 +662,13 @@ private:
> /// Vectorize a single entry in the tree, starting in \p VL.
> Value *vectorizeTree(ArrayRef<Value *> VL);
>
> + /// \returns the pointer to the vectorized value if \p VL is already
> + /// vectorized, or NULL. They may happen in cycles.
> + Value *alreadyVectorized(ArrayRef<Value *> VL, Value *OpValue) const;
> +
> /// \returns the scalarization cost for this type. Scalarization in this
> /// context means the creation of vectors from a group of scalars.
> - int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices);
> + int getGatherCost(Type *Ty);
>
> /// \returns the scalarization cost for this list of values. Assuming that
> /// this subtree gets vectorized, we may need to extract the values from the
> @@ -698,12 +702,8 @@ private:
>
> /// \returns true if the scalars in VL are equal to this entry.
> bool isSame(ArrayRef<Value *> VL) const {
> - if (VL.size() == Scalars.size())
> - return std::equal(VL.begin(), VL.end(), Scalars.begin());
> - return VL.size() == ReuseShuffleIndices.size() &&
> - std::equal(
> - VL.begin(), VL.end(), ReuseShuffleIndices.begin(),
> - [this](Value *V, unsigned Idx) { return V == Scalars[Idx]; });
> + assert(VL.size() == Scalars.size() && "Invalid size");
> + return std::equal(VL.begin(), VL.end(), Scalars.begin());
> }
>
> /// A vector of scalars.
> @@ -715,9 +715,6 @@ private:
> /// Do we need to gather this sequence ?
> bool NeedToGather = false;
>
> - /// Does this sequence require some shuffling?
> - SmallVector<unsigned, 4> ReuseShuffleIndices;
> -
> /// Points back to the VectorizableTree.
> ///
> /// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
> @@ -732,15 +729,13 @@ private:
> };
>
> /// Create a new VectorizableTree entry.
> - void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
> - ArrayRef<unsigned> ReuseShuffleIndices = None) {
> + TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
> + int &UserTreeIdx) {
> VectorizableTree.emplace_back(VectorizableTree);
> int idx = VectorizableTree.size() - 1;
> TreeEntry *Last = &VectorizableTree[idx];
> Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
> Last->NeedToGather = !Vectorized;
> - Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
> - ReuseShuffleIndices.end());
> if (Vectorized) {
> for (int i = 0, e = VL.size(); i != e; ++i) {
> assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
> @@ -753,6 +748,7 @@ private:
> if (UserTreeIdx >= 0)
> Last->UserTreeIndices.push_back(UserTreeIdx);
> UserTreeIdx = idx;
> + return Last;
> }
>
> /// -- Vectorization State --
> @@ -766,6 +762,13 @@ private:
> return nullptr;
> }
>
> + const TreeEntry *getTreeEntry(Value *V) const {
> + auto I = ScalarToTreeEntry.find(V);
> + if (I != ScalarToTreeEntry.end())
> + return &VectorizableTree[I->second];
> + return nullptr;
> + }
> +
> /// Maps a specific scalar to its tree entry.
> SmallDenseMap<Value*, int> ScalarToTreeEntry;
>
> @@ -1429,11 +1432,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>
> // Check if this is a duplicate of another entry.
> if (TreeEntry *E = getTreeEntry(S.OpValue)) {
> - DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
> - if (!E->isSame(VL)) {
> - DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
> - newTreeEntry(VL, false, UserTreeIdx);
> - return;
> + for (unsigned i = 0, e = VL.size(); i != e; ++i) {
> + DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
> + if (E->Scalars[i] != VL[i]) {
> + DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
> + newTreeEntry(VL, false, UserTreeIdx);
> + return;
> + }
> }
> // Record the reuse of the tree node. FIXME, currently this is only used to
> // properly draw the graph rather than for the actual vectorization.
> @@ -1479,26 +1484,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> }
>
> // Check that every instruction appears once in this bundle.
> - SmallVector<unsigned, 4> ReuseShuffleIndicies;
> - SmallVector<Value *, 4> UniqueValues;
> - DenseMap<Value *, unsigned> UniquePositions;
> - for (Value *V : VL) {
> - auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
> - ReuseShuffleIndicies.emplace_back(Res.first->second);
> - if (Res.second)
> - UniqueValues.emplace_back(V);
> - }
> - if (UniqueValues.size() == VL.size()) {
> - ReuseShuffleIndicies.clear();
> - } else {
> - DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
> - if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) {
> - DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
> - newTreeEntry(VL, false, UserTreeIdx);
> - return;
> - }
> - VL = UniqueValues;
> - }
> + for (unsigned i = 0, e = VL.size(); i < e; ++i)
> + for (unsigned j = i + 1; j < e; ++j)
> + if (VL[i] == VL[j]) {
> + DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
> + newTreeEntry(VL, false, UserTreeIdx);
> + return;
> + }
>
> auto &BSRef = BlocksSchedules[BB];
> if (!BSRef)
> @@ -1506,12 +1498,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>
> BlockScheduling &BS = *BSRef.get();
>
> - if (!BS.tryScheduleBundle(VL, this, VL0)) {
> + if (!BS.tryScheduleBundle(VL, this, S.OpValue)) {
> DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
> assert((!BS.getScheduleData(VL0) ||
> !BS.getScheduleData(VL0)->isPartOfBundle()) &&
> "tryScheduleBundle should cancelScheduling on failure");
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> return;
> }
> DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
> @@ -1530,12 +1522,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> if (Term) {
> DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> return;
> }
> }
>
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
>
> for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
> @@ -1553,7 +1545,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> case Instruction::ExtractElement: {
> bool Reuse = canReuseExtract(VL, VL0);
> if (Reuse) {
> - DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
> + DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
> ++NumOpsWantToKeepOrder[S.Opcode];
> } else {
> SmallVector<Value *, 4> ReverseVL(VL.rbegin(), VL.rend());
> @@ -1561,7 +1553,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> --NumOpsWantToKeepOrder[S.Opcode];
> BS.cancelScheduling(VL, VL0);
> }
> - newTreeEntry(VL, Reuse, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, Reuse, UserTreeIdx);
> return;
> }
> case Instruction::Load: {
> @@ -1576,7 +1568,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> if (DL->getTypeSizeInBits(ScalarTy) !=
> DL->getTypeAllocSizeInBits(ScalarTy)) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
> return;
> }
> @@ -1587,7 +1579,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> LoadInst *L = cast<LoadInst>(VL[i]);
> if (!L->isSimple()) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
> return;
> }
> @@ -1609,7 +1601,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>
> if (Consecutive) {
> ++NumOpsWantToKeepOrder[S.Opcode];
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a vector of loads.\n");
> return;
> }
> @@ -1624,7 +1616,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> }
>
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
>
> if (ReverseConsecutive) {
> --NumOpsWantToKeepOrder[S.Opcode];
> @@ -1651,12 +1643,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
> if (Ty != SrcTy || !isValidElementType(Ty)) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
> return;
> }
> }
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a vector of casts.\n");
>
> for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
> @@ -1679,13 +1671,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> if (Cmp->getPredicate() != P0 ||
> Cmp->getOperand(0)->getType() != ComparedTy) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
> return;
> }
> }
>
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a vector of compares.\n");
>
> for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
> @@ -1717,7 +1709,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> case Instruction::And:
> case Instruction::Or:
> case Instruction::Xor:
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
>
> // Sort operands of the instructions so that each side is more likely to
> @@ -1746,7 +1738,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
> DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> return;
> }
> }
> @@ -1759,7 +1751,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> if (Ty0 != CurTy) {
> DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> return;
> }
> }
> @@ -1771,12 +1763,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> DEBUG(
> dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> return;
> }
> }
>
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
> for (unsigned i = 0, e = 2; i < e; ++i) {
> ValueList Operands;
> @@ -1793,12 +1785,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
> if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
> return;
> }
>
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a vector of stores.\n");
>
> ValueList Operands;
> @@ -1816,7 +1808,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
> if (!isTriviallyVectorizable(ID)) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
> return;
> }
> @@ -1830,7 +1822,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
> !CI->hasIdenticalOperandBundleSchema(*CI2)) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
> << "\n");
> return;
> @@ -1841,7 +1833,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> Value *A1J = CI2->getArgOperand(1);
> if (A1I != A1J) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
> << " argument "<< A1I<<"!=" << A1J
> << "\n");
> @@ -1854,14 +1846,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> CI->op_begin() + CI->getBundleOperandsEndIndex(),
> CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!="
> << *VL[i] << '\n');
> return;
> }
> }
>
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
> ValueList Operands;
> // Prepare the operand vector.
> @@ -1878,11 +1870,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
> // then do not vectorize this instruction.
> if (!S.IsAltShuffle) {
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
> return;
> }
> - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, true, UserTreeIdx);
> DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
>
> // Reorder operands if reordering would enable vectorization.
> @@ -1906,7 +1898,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>
> default:
> BS.cancelScheduling(VL, VL0);
> - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
> + newTreeEntry(VL, false, UserTreeIdx);
> DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
> return;
> }
> @@ -1999,22 +1991,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> VecTy = VectorType::get(
> IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
>
> - unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
> - bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
> - int ReuseShuffleCost = 0;
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost =
> - TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
> - }
> if (E->NeedToGather) {
> if (allConstant(VL))
> return 0;
> if (isSplat(VL)) {
> - return ReuseShuffleCost +
> - TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
> + return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
> }
> - if (getSameOpcode(VL).Opcode == Instruction::ExtractElement &&
> - allSameType(VL) && allSameBlock(VL)) {
> + if (getSameOpcode(VL).Opcode == Instruction::ExtractElement) {
> Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
> if (ShuffleKind.hasValue()) {
> int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
> @@ -2031,10 +2014,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> IO->getZExtValue());
> }
> }
> - return ReuseShuffleCost + Cost;
> + return Cost;
> }
> }
> - return ReuseShuffleCost + getGatherCost(VL);
> + return getGatherCost(E->Scalars);
> }
> InstructionsState S = getSameOpcode(VL);
> assert(S.Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
> @@ -2047,36 +2030,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
>
> case Instruction::ExtractValue:
> case Instruction::ExtractElement:
> - if (NeedToShuffleReuses) {
> - unsigned Idx = 0;
> - for (unsigned I : E->ReuseShuffleIndices) {
> - if (ShuffleOrOp == Instruction::ExtractElement) {
> - auto *IO = cast<ConstantInt>(
> - cast<ExtractElementInst>(VL[I])->getIndexOperand());
> - Idx = IO->getZExtValue();
> - ReuseShuffleCost -= TTI->getVectorInstrCost(
> - Instruction::ExtractElement, VecTy, Idx);
> - } else {
> - ReuseShuffleCost -= TTI->getVectorInstrCost(
> - Instruction::ExtractElement, VecTy, Idx);
> - ++Idx;
> - }
> - }
> - Idx = ReuseShuffleNumbers;
> - for (Value *V : VL) {
> - if (ShuffleOrOp == Instruction::ExtractElement) {
> - auto *IO = cast<ConstantInt>(
> - cast<ExtractElementInst>(V)->getIndexOperand());
> - Idx = IO->getZExtValue();
> - } else {
> - --Idx;
> - }
> - ReuseShuffleCost +=
> - TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx);
> - }
> - }
> if (canReuseExtract(VL, S.OpValue)) {
> - int DeadCost = ReuseShuffleCost;
> + int DeadCost = 0;
> for (unsigned i = 0, e = VL.size(); i < e; ++i) {
> Instruction *E = cast<Instruction>(VL[i]);
> // If all users are going to be vectorized, instruction can be
> @@ -2084,12 +2039,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> // The same, if have only one user, it will be vectorized for sure.
> if (areAllUsersVectorized(E))
> // Take credit for instruction that will become dead.
> - DeadCost -=
> + DeadCost +=
> TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
> }
> - return DeadCost;
> + return -DeadCost;
> }
> - return ReuseShuffleCost + getGatherCost(VL);
> + return getGatherCost(VecTy);
>
> case Instruction::ZExt:
> case Instruction::SExt:
> @@ -2104,11 +2059,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> case Instruction::FPTrunc:
> case Instruction::BitCast: {
> Type *SrcTy = VL0->getOperand(0)->getType();
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost -=
> - (ReuseShuffleNumbers - VL.size()) *
> - TTI->getCastInstrCost(S.Opcode, ScalarTy, SrcTy, VL0);
> - }
>
> // Calculate the cost of this instruction.
> int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
> @@ -2117,26 +2067,19 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
> int VecCost = 0;
> // Check if the values are candidates to demote.
> - if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
> - VecCost = ReuseShuffleCost +
> - TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0);
> - }
> + if (!MinBWs.count(VL0) || VecTy != SrcVecTy)
> + VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0);
> return VecCost - ScalarCost;
> }
> case Instruction::FCmp:
> case Instruction::ICmp:
> case Instruction::Select: {
> // Calculate the cost of this instruction.
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
> - TTI->getCmpSelInstrCost(S.Opcode, ScalarTy,
> - Builder.getInt1Ty(), VL0);
> - }
> VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
> int ScalarCost = VecTy->getNumElements() *
> TTI->getCmpSelInstrCost(S.Opcode, ScalarTy, Builder.getInt1Ty(), VL0);
> int VecCost = TTI->getCmpSelInstrCost(S.Opcode, VecTy, MaskTy, VL0);
> - return ReuseShuffleCost + VecCost - ScalarCost;
> + return VecCost - ScalarCost;
> }
> case Instruction::Add:
> case Instruction::FAdd:
> @@ -2194,19 +2137,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> Op2VP = TargetTransformInfo::OP_PowerOf2;
>
> SmallVector<const Value *, 4> Operands(VL0->operand_values());
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost -=
> - (ReuseShuffleNumbers - VL.size()) *
> - TTI->getArithmeticInstrCost(S.Opcode, ScalarTy, Op1VK, Op2VK, Op1VP,
> - Op2VP, Operands);
> - }
> int ScalarCost =
> VecTy->getNumElements() *
> TTI->getArithmeticInstrCost(S.Opcode, ScalarTy, Op1VK, Op2VK, Op1VP,
> Op2VP, Operands);
> int VecCost = TTI->getArithmeticInstrCost(S.Opcode, VecTy, Op1VK, Op2VK,
> Op1VP, Op2VP, Operands);
> - return ReuseShuffleCost + VecCost - ScalarCost;
> + return VecCost - ScalarCost;
> }
> case Instruction::GetElementPtr: {
> TargetTransformInfo::OperandValueKind Op1VK =
> @@ -2214,46 +2151,31 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> TargetTransformInfo::OperandValueKind Op2VK =
> TargetTransformInfo::OK_UniformConstantValue;
>
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
> - TTI->getArithmeticInstrCost(Instruction::Add,
> - ScalarTy, Op1VK, Op2VK);
> - }
> int ScalarCost =
> VecTy->getNumElements() *
> TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
> int VecCost =
> TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
>
> - return ReuseShuffleCost + VecCost - ScalarCost;
> + return VecCost - ScalarCost;
> }
> case Instruction::Load: {
> // Cost of wide load - cost of scalar loads.
> unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment();
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
> - TTI->getMemoryOpCost(Instruction::Load, ScalarTy,
> - alignment, 0, VL0);
> - }
> int ScalarLdCost = VecTy->getNumElements() *
> TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
> int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
> VecTy, alignment, 0, VL0);
> - return ReuseShuffleCost + VecLdCost - ScalarLdCost;
> + return VecLdCost - ScalarLdCost;
> }
> case Instruction::Store: {
> // We know that we can merge the stores. Calculate the cost.
> unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment();
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) *
> - TTI->getMemoryOpCost(Instruction::Store, ScalarTy,
> - alignment, 0, VL0);
> - }
> int ScalarStCost = VecTy->getNumElements() *
> TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
> int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
> VecTy, alignment, 0, VL0);
> - return ReuseShuffleCost + VecStCost - ScalarStCost;
> + return VecStCost - ScalarStCost;
> }
> case Instruction::Call: {
> CallInst *CI = cast<CallInst>(VL0);
> @@ -2268,11 +2190,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
> FMF = FPMO->getFastMathFlags();
>
> - if (NeedToShuffleReuses) {
> - ReuseShuffleCost -=
> - (ReuseShuffleNumbers - VL.size()) *
> - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
> - }
> int ScalarCallCost = VecTy->getNumElements() *
> TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
>
> @@ -2284,7 +2201,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> << " (" << VecCallCost << "-" << ScalarCallCost << ")"
> << " for " << *CI << "\n");
>
> - return ReuseShuffleCost + VecCallCost - ScalarCallCost;
> + return VecCallCost - ScalarCallCost;
> }
> case Instruction::ShuffleVector: {
> TargetTransformInfo::OperandValueKind Op1VK =
> @@ -2292,22 +2209,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> TargetTransformInfo::OperandValueKind Op2VK =
> TargetTransformInfo::OK_AnyValue;
> int ScalarCost = 0;
> - if (NeedToShuffleReuses) {
> - for (unsigned Idx : E->ReuseShuffleIndices) {
> - Instruction *I = cast<Instruction>(VL[Idx]);
> - if (!I)
> - continue;
> - ReuseShuffleCost -= TTI->getArithmeticInstrCost(
> - I->getOpcode(), ScalarTy, Op1VK, Op2VK);
> - }
> - for (Value *V : VL) {
> - Instruction *I = cast<Instruction>(V);
> - if (!I)
> - continue;
> - ReuseShuffleCost += TTI->getArithmeticInstrCost(
> - I->getOpcode(), ScalarTy, Op1VK, Op2VK);
> - }
> - }
> int VecCost = 0;
> for (Value *i : VL) {
> Instruction *I = cast<Instruction>(i);
> @@ -2326,7 +2227,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
> TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
> VecCost +=
> TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
> - return ReuseShuffleCost + VecCost - ScalarCost;
> + return VecCost - ScalarCost;
> }
> default:
> llvm_unreachable("Unknown instruction");
> @@ -2502,14 +2403,10 @@ int BoUpSLP::getTreeCost() {
> return Cost;
> }
>
> -int BoUpSLP::getGatherCost(Type *Ty,
> - const DenseSet<unsigned> &ShuffledIndices) {
> +int BoUpSLP::getGatherCost(Type *Ty) {
> int Cost = 0;
> for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
> - if (!ShuffledIndices.count(i))
> - Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
> - if (!ShuffledIndices.empty())
> - Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
> + Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
> return Cost;
> }
>
> @@ -2520,17 +2417,7 @@ int BoUpSLP::getGatherCost(ArrayRef<Valu
> ScalarTy = SI->getValueOperand()->getType();
> VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
> // Find the cost of inserting/extracting values from the vector.
> - // Check if the same elements are inserted several times and count them as
> - // shuffle candidates.
> - DenseSet<unsigned> ShuffledElements;
> - DenseSet<Value *> UniqueElements;
> - // Iterate in reverse order to consider insert elements with the high cost.
> - for (unsigned I = VL.size(); I > 0; --I) {
> - unsigned Idx = I - 1;
> - if (!UniqueElements.insert(VL[Idx]).second)
> - ShuffledElements.insert(Idx);
> - }
> - return getGatherCost(VecTy, ShuffledElements);
> + return getGatherCost(VecTy);
> }
>
> // Reorder commutative operations in alternate shuffle if the resulting vectors
> @@ -2828,7 +2715,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *>
> if (TreeEntry *E = getTreeEntry(VL[i])) {
> // Find which lane we need to extract.
> int FoundLane = -1;
> - for (unsigned Lane = 0, LE = E->Scalars.size(); Lane != LE; ++Lane) {
> + for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) {
> // Is this the lane of the scalar that we are looking for ?
> if (E->Scalars[Lane] == VL[i]) {
> FoundLane = Lane;
> @@ -2844,6 +2731,14 @@ Value *BoUpSLP::Gather(ArrayRef<Value *>
> return Vec;
> }
>
> +Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL, Value *OpValue) const {
> + if (const TreeEntry *En = getTreeEntry(OpValue)) {
> + if (En->isSame(VL) && En->VectorizedValue)
> + return En->VectorizedValue;
> + }
> + return nullptr;
> +}
> +
> Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
> InstructionsState S = getSameOpcode(VL);
> if (S.Opcode) {
> @@ -2856,38 +2751,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<V
> Type *ScalarTy = S.OpValue->getType();
> if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
> ScalarTy = SI->getValueOperand()->getType();
> -
> - // Check that every instruction appears once in this bundle.
> - SmallVector<unsigned, 4> ReuseShuffleIndicies;
> - SmallVector<Value *, 4> UniqueValues;
> - if (VL.size() > 2) {
> - DenseMap<Value *, unsigned> UniquePositions;
> - for (Value *V : VL) {
> - auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
> - ReuseShuffleIndicies.emplace_back(Res.first->second);
> - if (Res.second || isa<Constant>(V))
> - UniqueValues.emplace_back(V);
> - }
> - // Do not shuffle single element or if number of unique values is not power
> - // of 2.
> - if (UniqueValues.size() == VL.size() || UniqueValues.size() <= 1 ||
> - !llvm::isPowerOf2_32(UniqueValues.size()))
> - ReuseShuffleIndicies.clear();
> - else
> - VL = UniqueValues;
> - }
> VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
>
> - Value *V = Gather(VL, VecTy);
> - if (!ReuseShuffleIndicies.empty()) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - ReuseShuffleIndicies, "shuffle");
> - if (auto *I = dyn_cast<Instruction>(V)) {
> - GatherSeq.insert(I);
> - CSEBlocks.insert(I->getParent());
> - }
> - }
> - return V;
> + return Gather(VL, VecTy);
> }
>
> Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
> @@ -2905,19 +2771,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> ScalarTy = SI->getValueOperand()->getType();
> VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
>
> - bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
> -
> if (E->NeedToGather) {
> setInsertPointAfterBundle(E->Scalars, VL0);
> auto *V = Gather(E->Scalars, VecTy);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - if (auto *I = dyn_cast<Instruction>(V)) {
> - GatherSeq.insert(I);
> - CSEBlocks.insert(I->getParent());
> - }
> - }
> E->VectorizedValue = V;
> return V;
> }
> @@ -2930,12 +2786,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
> Builder.SetCurrentDebugLocation(PH->getDebugLoc());
> PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
> - Value *V = NewPhi;
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> - E->VectorizedValue = V;
> + E->VectorizedValue = NewPhi;
>
> // PHINodes may have multiple entries from the same block. We want to
> // visit every block once.
> @@ -2962,30 +2813,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>
> assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
> "Invalid number of incoming values");
> - return V;
> + return NewPhi;
> }
>
> case Instruction::ExtractElement: {
> if (canReuseExtract(E->Scalars, VL0)) {
> Value *V = VL0->getOperand(0);
> - if (NeedToShuffleReuses) {
> - Builder.SetInsertPoint(VL0);
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> return V;
> }
> setInsertPointAfterBundle(E->Scalars, VL0);
> auto *V = Gather(E->Scalars, VecTy);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - if (auto *I = dyn_cast<Instruction>(V)) {
> - GatherSeq.insert(I);
> - CSEBlocks.insert(I->getParent());
> - }
> - }
> E->VectorizedValue = V;
> return V;
> }
> @@ -2996,24 +2834,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
> Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
> LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment());
> - Value *NewV = propagateMetadata(V, E->Scalars);
> - if (NeedToShuffleReuses) {
> - NewV = Builder.CreateShuffleVector(
> - NewV, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle");
> - }
> - E->VectorizedValue = NewV;
> - return NewV;
> + E->VectorizedValue = V;
> + return propagateMetadata(V, E->Scalars);
> }
> setInsertPointAfterBundle(E->Scalars, VL0);
> auto *V = Gather(E->Scalars, VecTy);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - if (auto *I = dyn_cast<Instruction>(V)) {
> - GatherSeq.insert(I);
> - CSEBlocks.insert(I->getParent());
> - }
> - }
> E->VectorizedValue = V;
> return V;
> }
> @@ -3037,17 +2862,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>
> Value *InVec = vectorizeTree(INVL);
>
> - if (E->VectorizedValue) {
> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
> - return E->VectorizedValue;
> - }
> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
> + return V;
>
> CastInst *CI = dyn_cast<CastInst>(VL0);
> Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> ++NumVectorInstructions;
> return V;
> @@ -3065,10 +2884,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> Value *L = vectorizeTree(LHSV);
> Value *R = vectorizeTree(RHSV);
>
> - if (E->VectorizedValue) {
> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
> - return E->VectorizedValue;
> - }
> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
> + return V;
>
> CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
> Value *V;
> @@ -3077,12 +2894,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> else
> V = Builder.CreateICmp(P0, L, R);
>
> - propagateIRFlags(V, E->Scalars, VL0);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> + propagateIRFlags(E->VectorizedValue, E->Scalars, VL0);
> ++NumVectorInstructions;
> return V;
> }
> @@ -3100,16 +2913,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> Value *True = vectorizeTree(TrueVec);
> Value *False = vectorizeTree(FalseVec);
>
> - if (E->VectorizedValue) {
> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
> - return E->VectorizedValue;
> - }
> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
> + return V;
>
> Value *V = Builder.CreateSelect(Cond, True, False);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> ++NumVectorInstructions;
> return V;
> @@ -3148,24 +2955,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> Value *LHS = vectorizeTree(LHSVL);
> Value *RHS = vectorizeTree(RHSVL);
>
> - if (E->VectorizedValue) {
> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
> - return E->VectorizedValue;
> - }
> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
> + return V;
>
> Value *V = Builder.CreateBinOp(
> static_cast<Instruction::BinaryOps>(S.Opcode), LHS, RHS);
> - propagateIRFlags(V, E->Scalars, VL0);
> - if (auto *I = dyn_cast<Instruction>(V))
> - V = propagateMetadata(I, E->Scalars);
> -
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> + propagateIRFlags(E->VectorizedValue, E->Scalars, VL0);
> ++NumVectorInstructions;
>
> + if (Instruction *I = dyn_cast<Instruction>(V))
> + return propagateMetadata(I, E->Scalars);
> +
> return V;
> }
> case Instruction::Load: {
> @@ -3193,14 +2994,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> Alignment = DL->getABITypeAlignment(ScalarLoadTy);
> }
> LI->setAlignment(Alignment);
> - Value *V = propagateMetadata(LI, E->Scalars);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> - E->VectorizedValue = V;
> + E->VectorizedValue = LI;
> ++NumVectorInstructions;
> - return V;
> + return propagateMetadata(LI, E->Scalars);
> }
> case Instruction::Store: {
> StoreInst *SI = cast<StoreInst>(VL0);
> @@ -3228,14 +3024,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
>
> S->setAlignment(Alignment);
> - Value *V = propagateMetadata(S, E->Scalars);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> - E->VectorizedValue = V;
> + E->VectorizedValue = S;
> ++NumVectorInstructions;
> - return V;
> + return propagateMetadata(S, E->Scalars);
> }
> case Instruction::GetElementPtr: {
> setInsertPointAfterBundle(E->Scalars, VL0);
> @@ -3259,16 +3050,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>
> Value *V = Builder.CreateGEP(
> cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
> - if (Instruction *I = dyn_cast<Instruction>(V))
> - V = propagateMetadata(I, E->Scalars);
> -
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> ++NumVectorInstructions;
>
> + if (Instruction *I = dyn_cast<Instruction>(V))
> + return propagateMetadata(I, E->Scalars);
> +
> return V;
> }
> case Instruction::Call: {
> @@ -3315,12 +3102,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> if (ScalarArg && getTreeEntry(ScalarArg))
> ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
>
> - propagateIRFlags(V, E->Scalars, VL0);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> + propagateIRFlags(E->VectorizedValue, E->Scalars, VL0);
> ++NumVectorInstructions;
> return V;
> }
> @@ -3334,10 +3117,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> Value *LHS = vectorizeTree(LHSVL);
> Value *RHS = vectorizeTree(RHSVL);
>
> - if (E->VectorizedValue) {
> - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
> - return E->VectorizedValue;
> - }
> + if (Value *V = alreadyVectorized(E->Scalars, VL0))
> + return V;
>
> // Create a vector of LHS op1 RHS
> Value *V0 = Builder.CreateBinOp(
> @@ -3369,14 +3150,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
> propagateIRFlags(V1, OddScalars);
>
> Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
> - if (Instruction *I = dyn_cast<Instruction>(V))
> - V = propagateMetadata(I, E->Scalars);
> - if (NeedToShuffleReuses) {
> - V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
> - E->ReuseShuffleIndices, "shuffle");
> - }
> E->VectorizedValue = V;
> ++NumVectorInstructions;
> + if (Instruction *I = dyn_cast<Instruction>(V))
> + return propagateMetadata(I, E->Scalars);
>
> return V;
> }
> @@ -3546,12 +3323,14 @@ void BoUpSLP::optimizeGatherSequence() {
> DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
> << " gather sequences instructions.\n");
> // LICM InsertElementInst sequences.
> - for (Instruction *I : GatherSeq) {
> - if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I))
> + for (Instruction *it : GatherSeq) {
> + InsertElementInst *Insert = dyn_cast<InsertElementInst>(it);
> +
> + if (!Insert)
> continue;
>
> // Check if this block is inside a loop.
> - Loop *L = LI->getLoopFor(I->getParent());
> + Loop *L = LI->getLoopFor(Insert->getParent());
> if (!L)
> continue;
>
> @@ -3563,15 +3342,15 @@ void BoUpSLP::optimizeGatherSequence() {
> // If the vector or the element that we insert into it are
> // instructions that are defined in this basic block then we can't
> // hoist this instruction.
> - auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
> - auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
> - if (Op0 && L->contains(Op0))
> + Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
> + Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
> + if (CurrVec && L->contains(CurrVec))
> continue;
> - if (Op1 && L->contains(Op1))
> + if (NewElem && L->contains(NewElem))
> continue;
>
> // We can hoist this instruction. Move it to the pre-header.
> - I->moveBefore(PreHeader->getTerminator());
> + Insert->moveBefore(PreHeader->getTerminator());
> }
>
> // Make a list of all reachable blocks in our CSE queue.
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll?rev=323447&r1=323446&r2=323447&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll Thu Jan 25 09:28:12 2018
> @@ -4,14 +4,15 @@
> define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
> ; CHECK-LABEL: @i64_simplified(
> ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
> -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
> -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
> +; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
> +; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
> ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
> ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
> ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>*
> -; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
> +; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8
> +; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX3]], align 8
> +; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
> +; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
> ; CHECK-NEXT: ret void
> ;
> %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll?rev=323447&r1=323446&r2=323447&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll Thu Jan 25 09:28:12 2018
> @@ -137,19 +137,17 @@ define i8 @k(<4 x i8> %x) {
>
> define i8 @k_bb(<4 x i8> %x) {
> ; CHECK-LABEL: @k_bb(
> -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
> ; CHECK-NEXT: br label [[BB1:%.*]]
> ; CHECK: bb1:
> -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
> -; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
> -; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
> -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X]], [[X]]
> -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X0X0]], [[X3X3]]
> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
> -; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP3]], [[TMP4]]
> -; CHECK-NEXT: [[TMP6:%.*]] = sdiv i8 [[TMP2]], [[TMP5]]
> -; CHECK-NEXT: ret i8 [[TMP6]]
> +; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
> +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> undef, <2 x i32> <i32 3, i32 2>
> +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
> +; CHECK-NEXT: ret i8 [[TMP8]]
> ;
> %x0 = extractelement <4 x i8> %x, i32 0
> br label %bb1
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll?rev=323447&r1=323446&r2=323447&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/hoist.ll Thu Jan 25 09:28:12 2018
> @@ -16,18 +16,19 @@ target triple = "i386-apple-macosx10.9.0
> define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
> ; CHECK-LABEL: @foo(
> ; CHECK-NEXT: entry:
> -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[N:%.*]], i32 0
> -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[K:%.*]], i32 1
> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
> +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[K:%.*]], i32 1
> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[K]], i32 3
> ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
> ; CHECK: for.body:
> ; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ]
> ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]]
> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
> -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[SHUFFLE]], [[TMP3]]
> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
> -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
> +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
> +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]]
> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
> +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
> ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4
> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD10]], 10000
> ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list