[llvm] f06e332 - Revert "[SLP]Improve/fix reordering of the gathered graph nodes."

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 27 11:27:36 PDT 2021


Reminder: When reverting, please provide enough information so that 
others following along know what happened, and ideally, how to 
reproduce.  A link to e.g. a buildbot is encouraged.

Philip

On 10/27/21 11:17 AM, Alexey Bataev via llvm-commits wrote:
> Author: Alexey Bataev
> Date: 2021-10-27T11:16:58-07:00
> New Revision: f06e33298266b46985ebd6612cde587c27e1ef1a
>
> URL: https://github.com/llvm/llvm-project/commit/f06e33298266b46985ebd6612cde587c27e1ef1a
> DIFF: https://github.com/llvm/llvm-project/commit/f06e33298266b46985ebd6612cde587c27e1ef1a.diff
>
> LOG: Revert "[SLP]Improve/fix reordering of the gathered graph nodes."
>
> This reverts commit 64d1617d18cb8b6f9511d0eda481fc5a5d0ebddf to fix test
> non-stability.
>
> Added:
>      
>
> Modified:
>      llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>      llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
>      llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
>      llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
>      llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
>
> Removed:
>      
>
>
> ################################################################################
> diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> index 174aeab5e448a..63bc1faed0dee 100644
> --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> @@ -766,12 +766,6 @@ class BoUpSLP {
>     /// Perform LICM and CSE on the newly generated gather sequences.
>     void optimizeGatherSequence();
>   
> -  /// Checks if the specified gather tree entry \p TE can be represented as a
> -  /// shuffled vector entry + (possibly) permutation with other gathers. It
> -  /// implements the checks only for possibly ordered scalars (Loads,
> -  /// ExtractElement, ExtractValue), which can be part of the graph.
> -  Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
> -
>     /// Reorders the current graph to the most profitable order starting from the
>     /// root node to the leaf nodes. The best order is chosen only from the nodes
>     /// of the same size (vectorization factor). Smaller nodes are considered
> @@ -2676,72 +2670,6 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
>     fixupOrderingIndices(Order);
>   }
>   
> -Optional<BoUpSLP::OrdersType>
> -BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
> -  assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
> -  unsigned NumScalars = TE.Scalars.size();
> -  OrdersType CurrentOrder(NumScalars, NumScalars);
> -  SmallVector<int> Positions;
> -  SmallBitVector UsedPositions(NumScalars);
> -  const TreeEntry *STE = nullptr;
> -  // Try to find all gathered scalars that are gets vectorized in other
> -  // vectorize node. Here we can have only one single tree vector node to
> -  // correctly identify order of the gathered scalars.
> -  for (unsigned I = 0; I < NumScalars; ++I) {
> -    Value *V = TE.Scalars[I];
> -    if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
> -      continue;
> -    if (const auto *LocalSTE = getTreeEntry(V)) {
> -      if (!STE)
> -        STE = LocalSTE;
> -      else if (STE != LocalSTE)
> -        // Take the order only from the single vector node.
> -        return None;
> -      unsigned Lane =
> -          std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
> -      if (Lane >= NumScalars)
> -        return None;
> -      if (CurrentOrder[Lane] != NumScalars) {
> -        if (Lane != I)
> -          continue;
> -        UsedPositions.reset(CurrentOrder[Lane]);
> -      }
> -      // The partial identity (where only some elements of the gather node are
> -      // in the identity order) is good.
> -      CurrentOrder[Lane] = I;
> -      UsedPositions.set(I);
> -    }
> -  }
> -  // Need to keep the order if we have a vector entry and at least 2 scalars or
> -  // the vectorized entry has just 2 scalars.
> -  if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
> -    auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
> -      for (unsigned I = 0; I < NumScalars; ++I)
> -        if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
> -          return false;
> -      return true;
> -    };
> -    if (IsIdentityOrder(CurrentOrder)) {
> -      CurrentOrder.clear();
> -      return CurrentOrder;
> -    }
> -    auto *It = CurrentOrder.begin();
> -    for (unsigned I = 0; I < NumScalars;) {
> -      if (UsedPositions.test(I)) {
> -        ++I;
> -        continue;
> -      }
> -      if (*It == NumScalars) {
> -        *It = I;
> -        ++I;
> -      }
> -      ++It;
> -    }
> -    return CurrentOrder;
> -  }
> -  return None;
> -}
> -
>   void BoUpSLP::reorderTopToBottom() {
>     // Maps VF to the graph nodes.
>     DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
> @@ -2761,29 +2689,19 @@ void BoUpSLP::reorderTopToBottom() {
>               InsertElementInst>(TE->getMainOp()) &&
>           !TE->isAltShuffle()) {
>         VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
> -      return;
> -    }
> -    if (TE->State == TreeEntry::NeedToGather) {
> -      if (TE->getOpcode() == Instruction::ExtractElement &&
> -          !TE->isAltShuffle() &&
> -          isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
> -                                   ->getVectorOperandType()) &&
> -          allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
> -        // Check that gather of extractelements can be represented as
> -        // just a shuffle of a single vector.
> -        OrdersType CurrentOrder;
> -        bool Reuse =
> -            canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
> -        if (Reuse || !CurrentOrder.empty()) {
> -          VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
> -          GathersToOrders.try_emplace(TE.get(), CurrentOrder);
> -          return;
> -        }
> -      }
> -      if (Optional<OrdersType> CurrentOrder =
> -              findReusedOrderedScalars(*TE.get())) {
> +    } else if (TE->State == TreeEntry::NeedToGather &&
> +               TE->getOpcode() == Instruction::ExtractElement &&
> +               !TE->isAltShuffle() &&
> +               isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
> +                                        ->getVectorOperandType()) &&
> +               allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
> +      // Check that gather of extractelements can be represented as
> +      // just a shuffle of a single vector.
> +      OrdersType CurrentOrder;
> +      bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
> +      if (Reuse || !CurrentOrder.empty()) {
>           VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
> -        GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
> +        GathersToOrders.try_emplace(TE.get(), CurrentOrder);
>         }
>       }
>     });
> @@ -2835,7 +2753,7 @@ void BoUpSLP::reorderTopToBottom() {
>       // Choose the most used order.
>       ArrayRef<unsigned> BestOrder = OrdersUses.begin()->first;
>       unsigned Cnt = OrdersUses.begin()->second;
> -    for (const auto &Pair : drop_begin(OrdersUses)) {
> +    for (const auto &Pair : llvm::drop_begin(OrdersUses)) {
>         if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
>           BestOrder = Pair.first;
>           Cnt = Pair.second;
> @@ -2912,8 +2830,6 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
>     for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
>                                 &NonVectorized](
>                                    const std::unique_ptr<TreeEntry> &TE) {
> -    if (TE->State != TreeEntry::Vectorize)
> -      NonVectorized.push_back(TE.get());
>       // No need to reorder if need to shuffle reuses, still need to shuffle the
>       // node.
>       if (!TE->ReuseShuffleIndices.empty())
> @@ -2922,37 +2838,28 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
>           isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
>           !TE->isAltShuffle()) {
>         OrderedEntries.insert(TE.get());
> -      return;
> -    }
> -    if (TE->State == TreeEntry::NeedToGather) {
> -      if (TE->getOpcode() == Instruction::ExtractElement &&
> -          !TE->isAltShuffle() &&
> -          isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
> -                                   ->getVectorOperandType()) &&
> -          allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
> -        // Check that gather of extractelements can be represented as
> -        // just a shuffle of a single vector with a single user only.
> -        OrdersType CurrentOrder;
> -        bool Reuse =
> -            canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
> -        if ((Reuse || !CurrentOrder.empty()) &&
> -            !any_of(VectorizableTree,
> -                    [&TE](const std::unique_ptr<TreeEntry> &Entry) {
> -                      return Entry->State == TreeEntry::NeedToGather &&
> -                             Entry.get() != TE.get() &&
> -                             Entry->isSame(TE->Scalars);
> -                    })) {
> -          OrderedEntries.insert(TE.get());
> -          GathersToOrders.try_emplace(TE.get(), CurrentOrder);
> -          return;
> -        }
> -      }
> -      if (Optional<OrdersType> CurrentOrder =
> -              findReusedOrderedScalars(*TE.get())) {
> +    } else if (TE->State == TreeEntry::NeedToGather &&
> +               TE->getOpcode() == Instruction::ExtractElement &&
> +               !TE->isAltShuffle() &&
> +               isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
> +                                        ->getVectorOperandType()) &&
> +               allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
> +      // Check that gather of extractelements can be represented as
> +      // just a shuffle of a single vector with a single user only.
> +      OrdersType CurrentOrder;
> +      bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
> +      if ((Reuse || !CurrentOrder.empty()) &&
> +          !any_of(
> +              VectorizableTree, [&TE](const std::unique_ptr<TreeEntry> &Entry) {
> +                return Entry->State == TreeEntry::NeedToGather &&
> +                       Entry.get() != TE.get() && Entry->isSame(TE->Scalars);
> +              })) {
>           OrderedEntries.insert(TE.get());
> -        GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
> +        GathersToOrders.try_emplace(TE.get(), CurrentOrder);
>         }
>       }
> +    if (TE->State != TreeEntry::Vectorize)
> +      NonVectorized.push_back(TE.get());
>     });
>   
>     // Checks if the operands of the users are reordarable and have only single
> @@ -3004,7 +2911,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
>       for (TreeEntry *TE : OrderedEntries) {
>         if (!(TE->State == TreeEntry::Vectorize ||
>               (TE->State == TreeEntry::NeedToGather &&
> -             GathersToOrders.count(TE))) ||
> +             TE->getOpcode() == Instruction::ExtractElement)) ||
>             TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
>             !all_of(drop_begin(TE->UserTreeIndices),
>                     [TE](const EdgeInfo &EI) {
> @@ -3082,7 +2989,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
>         // Choose the best order.
>         ArrayRef<unsigned> BestOrder = OrdersUses.begin()->first;
>         unsigned Cnt = OrdersUses.begin()->second;
> -      for (const auto &Pair : drop_begin(OrdersUses)) {
> +      for (const auto &Pair : llvm::drop_begin(OrdersUses)) {
>           if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
>             BestOrder = Pair.first;
>             Cnt = Pair.second;
> @@ -3125,13 +3032,10 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
>         }
>         // For gathers just need to reorder its scalars.
>         for (TreeEntry *Gather : GatherOps) {
> +        if (!Gather->ReuseShuffleIndices.empty())
> +          continue;
>           assert(Gather->ReorderIndices.empty() &&
>                  "Unexpected reordering of gathers.");
> -        if (!Gather->ReuseShuffleIndices.empty()) {
> -          // Just reorder reuses indices.
> -          reorderReuses(Gather->ReuseShuffleIndices, Mask);
> -          continue;
> -        }
>           reorderScalars(Gather->Scalars, Mask);
>           OrderedEntries.remove(Gather);
>         }
> @@ -7465,7 +7369,9 @@ struct SLPVectorizer : public FunctionPass {
>       initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
>     }
>   
> -  bool doInitialization(Module &M) override { return false; }
> +  bool doInitialization(Module &M) override {
> +    return false;
> +  }
>   
>     bool runOnFunction(Function &F) override {
>       if (skipFunction(F))
>
> diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
> index 16fd83f4b2ec2..96b143cfc4527 100644
> --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
> @@ -32,19 +32,21 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
>   
>   define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
>   ; CHECK-LABEL: @store_chain_v2i64(
> -; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
> -; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
> -; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
> -; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
> -; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
> -; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
> -; CHECK-NEXT:    [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
> -; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
> -; CHECK-NEXT:    store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
> +; CHECK-NEXT:    [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
> +; CHECK-NEXT:    [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
> +; CHECK-NEXT:    [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
> +; CHECK-NEXT:    [[V0_0:%.*]] = load i64, i64* [[A]], align 8
> +; CHECK-NEXT:    [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
> +; CHECK-NEXT:    [[V1_0:%.*]] = load i64, i64* [[B]], align 8
> +; CHECK-NEXT:    [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
> +; CHECK-NEXT:    [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
> +; CHECK-NEXT:    [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
> +; CHECK-NEXT:    [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
> +; CHECK-NEXT:    [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
> +; CHECK-NEXT:    [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
> +; CHECK-NEXT:    [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
> +; CHECK-NEXT:    store i64 [[TMP2_0]], i64* [[C]], align 8
> +; CHECK-NEXT:    store i64 [[TMP2_1]], i64* [[C_1]], align 8
>   ; CHECK-NEXT:    ret void
>   ;
>     %a.0 = getelementptr i64, i64* %a, i64 0
>
> diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
> index b4ed8604e2f85..34b32f2265521 100644
> --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
> @@ -32,19 +32,21 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
>   
>   define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
>   ; CHECK-LABEL: @store_chain_v2i64(
> -; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
> -; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
> -; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
> -; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
> -; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
> -; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
> -; CHECK-NEXT:    [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
> -; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
> -; CHECK-NEXT:    store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
> +; CHECK-NEXT:    [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
> +; CHECK-NEXT:    [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
> +; CHECK-NEXT:    [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
> +; CHECK-NEXT:    [[V0_0:%.*]] = load i64, i64* [[A]], align 8
> +; CHECK-NEXT:    [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
> +; CHECK-NEXT:    [[V1_0:%.*]] = load i64, i64* [[B]], align 8
> +; CHECK-NEXT:    [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
> +; CHECK-NEXT:    [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
> +; CHECK-NEXT:    [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
> +; CHECK-NEXT:    [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
> +; CHECK-NEXT:    [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
> +; CHECK-NEXT:    [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
> +; CHECK-NEXT:    [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
> +; CHECK-NEXT:    store i64 [[TMP2_0]], i64* [[C]], align 8
> +; CHECK-NEXT:    store i64 [[TMP2_1]], i64* [[C_1]], align 8
>   ; CHECK-NEXT:    ret void
>   ;
>     %a.0 = getelementptr i64, i64* %a, i64 0
>
> diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
> index e1c38e398150a..11e313bdbe6fb 100644
> --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
> @@ -69,23 +69,22 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca
>   ; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
>   ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
>   ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
> -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
> -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
>   ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
> -; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
> +; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
>   ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
> -; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
> +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
>   ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2
> -; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
> +; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
>   ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3
> -; CHECK-NEXT:    [[TMP11:%.*]] = mul <4 x i32> [[SHUFFLE]], [[TMP10]]
> +; CHECK-NEXT:    [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]]
>   ; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
>   ; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
>   ; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
>   ; CHECK-NEXT:    [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
> -; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
> +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
>   ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
> -; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP12]], align 4
> +; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4
>   ; CHECK-NEXT:    ret i32 undef
>   ;
>     %in.addr = getelementptr inbounds i32, i32* %in, i64 0
>
> diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
> index 96502d44acee4..623fb602279d7 100644
> --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
> @@ -600,18 +600,21 @@ define void @ChecksExtractScores_
> diff erent_vectors(double* %storeArray, double*
>   ; CHECK-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
>   ; CHECK-NEXT:    [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
>   ; CHECK-NEXT:    [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
> -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0
> -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1
> -; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
> -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
> -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
> -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
> -; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]]
> -; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], [[SHUFFLE]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
> +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1
> +; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
> +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
> +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
> +; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
> +; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]]
> +; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
> +; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1
> +; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]]
> +; CHECK-NEXT:    [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]]
>   ; CHECK-NEXT:    [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
>   ; CHECK-NEXT:    [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
> -; CHECK-NEXT:    [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
> -; CHECK-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
> +; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
> +; CHECK-NEXT:    store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
>   ; CHECK-NEXT:    ret void
>   ;
>     %idx0 = getelementptr inbounds double, double* %array, i64 0
>
>
>          
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list