[llvm] 455ca0e - [SLP] Allow reordering of vectorization trees with reused instructions.
    Alexey.Bataev via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Sat Sep 19 08:44:27 PDT 2020
    
    
  
Hi Bogdan, yes, this is what I thought! Thanks, I'll investigate it.
-------------
Best regards,
Alexey Bataev
19.09.2020 11:38 AM, Bogdan Graur пишет:
> Hi folks,
>
> I've attached with gdb to the running clang process (after ~1h 40m it
> started compiling) and got this trace:
>
> (gdb) bt
> #0  0x0000557ce35c98af in
> llvm::slpvectorizer::BoUpSLP::findRootOrder(llvm::SmallVector<unsigned
> int, 4u>&) ()
> #1  0x0000557ce4212a56 in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #2  0x0000557ce4212c2d in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #3  0x0000557ce42120fd in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #4  0x0000557ce4211cc6 in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #5  0x0000557ce4212c50 in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #6  0x0000557ce4211cc6 in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #7  0x0000557ce421341b in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #8  0x0000557ce4212c50 in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #9  0x0000557ce42147ef in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #10 0x0000557ce4212996 in
> llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
> unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&) ()
> #11 0x0000557ce420f3ba in
> llvm::slpvectorizer::BoUpSLP::buildTree(llvm::ArrayRef<llvm::Value*>,
> llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*,
> 2u>, llvm::DenseMap<llvm::Value*, unsigned int,
> llvm::DenseMapInfo<llvm::Value*>,
> llvm::detail::DenseMapPair<llvm::Value*, unsigned int> >,
> std::__u::vector<std::__u::pair<llvm::Value*,
> llvm::SmallVector<llvm::Instruction*, 2u> >,
> std::__u::allocator<std::__u::pair<llvm::Value*,
> llvm::SmallVector<llvm::Instruction*, 2u> > > > >&,
> llvm::ArrayRef<llvm::Value*>) ()                                      
>                                                                      
>                                                                      
>                                
> #12 0x0000557ce420afca in
> llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*,
> llvm::Value*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&,
> llvm::TargetTransformInfo*) ()
> #13 0x0000557ce420269d in
> llvm::SLPVectorizerPass::runImpl(llvm::Function&,
> llvm::ScalarEvolution*, llvm::TargetTransformInfo*,
> llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*,
> llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*,
> llvm::OptimizationRemarkEmitter*) ()
> #14 0x0000557ce2e7c7af in
> llvm::SLPVectorizerPass::run(llvm::Function&,
> llvm::AnalysisManager<llvm::Function>&) ()
> #15 0x0000557ce2e7c5b2 in llvm::detail::PassModel<llvm::Function,
> llvm::SLPVectorizerPass, llvm::PreservedAnalyses,
> llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&,
> llvm::AnalysisManager<llvm::Function>&) ()
> #16 0x0000557ce39153c7 in llvm::PassManager<llvm::Function,
> llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&,
> llvm::AnalysisManager<llvm::Function>&) ()
> #17 0x0000557ce44d8c69 in llvm::detail::PassModel<llvm::Module,
> llvm::ModuleToFunctionPassAdaptor<llvm::PassManager<llvm::Function,
> llvm::AnalysisManager<llvm::Function>> >, llvm::PreservedAnalyses,
> llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
> llvm::AnalysisManager<llvm::Module>&) ()
> #18 0x0000557ce2fda20b in llvm::PassManager<llvm::Module,
> llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
> llvm::AnalysisManager<llvm::Module>&) ()
> #19 0x0000557ce2fda0b2 in llvm::detail::PassModel<llvm::Module,
> llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>,
> llvm::PreservedAnalyses,
> llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
> llvm::AnalysisManager<llvm::Module>&) ()
> #20 0x0000557ce2fda20b in llvm::PassManager<llvm::Module,
> llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
> llvm::AnalysisManager<llvm::Module>&) ()
> #21 0x0000557ce44b7c08 in (anonymous
> namespace)::EmitAssemblyHelper::EmitAssemblyWithNewPassManager(clang::BackendAction,
> std::__u::unique_ptr<llvm::raw_pwrite_stream,
> std::__u::default_delete<llvm::raw_pwrite_stream> >) ()
> #22 0x0000557ce31bb048 in
> clang::EmitBackendOutput(clang::DiagnosticsEngine&,
> clang::HeaderSearchOptions const&, clang::CodeGenOptions const&,
> clang::TargetOptions const&, clang::LangOptions const&,
> llvm::DataLayout const&, llvm::Module*, clang::BackendAction,
> std::__u::unique_ptr<llvm::raw_pwrite_stream,
> std::__u::default_delete<llvm::raw_pwrite_stream> >) ()
> #23 0x0000557ce31ba7f6 in
> clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) ()
> #24 0x0000557ce3005d80 in clang::ParseAST(clang::Sema&, bool, bool) ()
> #25 0x0000557ce3138515 in clang::FrontendAction::Execute() ()
> #26 0x0000557ce3137b54 in
> clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) ()
> #27 0x0000557ce3136bb0 in
> clang::ExecuteCompilerInvocation(clang::CompilerInstance*) ()
> #28 0x0000557ce312a4ae in cc1_main(llvm::ArrayRef<char const*>, char
> const*, void*) ()
> #29 0x0000557ce312997d in ExecuteCC1Tool(llvm::SmallVectorImpl<char
> const*>&) ()
> #30 0x0000557ce42ab049 in main ()
>
> It looks to me the control flow is stuck in
> llvm::slpvectorizer::BoUpSLP::findRootOrder(llvm::SmallVector<unsigned
> int, 4u>&) () as I issued a GDB 'finish' command which didn't finish
> after ~5m.
>
> Please let me know if you need more info!
>
> Best,
>
> On Fri, Sep 18, 2020 at 10:47 PM Eric Christopher <echristo at gmail.com
> <mailto:echristo at gmail.com>> wrote:
>
>     Following up here:
>
>     After discussing with Alexey on IRC I've temporarily reverted
>     this. Bogdan was seeing infinite loops in compilation and is going
>     to follow up with a backtrace and a test case later if the
>     backtrace isn't enough.
>
>     Reverted thusly:
>     echristo at athyra ~/s/llvm-project (master)> git push
>     To github.com:llvm/llvm-project.git
>        b168bbfae42..ecfd8161bf4  master -> master
>
>     Thanks a ton Alexey, we'll get back to you asap.
>
>     -eric
>
>
>     On Fri, Sep 18, 2020 at 9:38 AM Alexey Bataev via llvm-commits
>     <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>>
>     wrote:
>
>
>         Author: Alexey Bataev
>         Date: 2020-09-18T09:34:59-04:00
>         New Revision: 455ca0ebb69210046928fedffe292420a30f89ad
>
>         URL:
>         https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad
>         <https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad>
>         DIFF:
>         https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad.diff
>         <https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad.diff>
>
>         LOG: [SLP] Allow reordering of vectorization trees with reused
>         instructions.
>
>         If some leaves have the same instructions to be vectorized, we may
>         incorrectly evaluate the best order for the root node (it is
>         built for the
>         vector of instructions without repeated instructions and,
>         thus, has less
>         elements than the root node). In this case we just can not try
>         to reorder
>         the tree + we may calculate the wrong number of nodes that
>         requre the
>         same reordering.
>         For example, if the root node is \<a+b, a+c, a+d, f+e\>, then
>         the leaves
>         are \<a, a, a, f\> and \<b, c, d, e\>. When we try to
>         vectorize the first
>         leaf, it will be shrink to \<a, b\>. If instructions in this
>         leaf should
>         be reordered, the best order will be \<1, 0\>. We need to
>         extend this
>         order for the root node. For the root node this order should
>         look like
>         \<3, 0, 1, 2\>. This patch allows extension of the orders of
>         the nodes
>         with the reused instructions.
>
>         Reviewed By: RKSimon
>
>         Differential Revision: https://reviews.llvm.org/D45263
>         <https://reviews.llvm.org/D45263>
>
>         Added:
>
>
>         Modified:
>             llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>             llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
>             llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>            
>         llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>
>         Removed:
>
>
>
>         ################################################################################
>         diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>         b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>         index c487301177c1..e4cad01e958a 100644
>         --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>         +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>         @@ -523,6 +523,15 @@ static bool isSimple(Instruction *I) {
>
>          namespace llvm {
>
>         +static void inversePermutation(ArrayRef<unsigned> Indices,
>         +                               SmallVectorImpl<int> &Mask) {
>         +  Mask.clear();
>         +  const unsigned E = Indices.size();
>         +  Mask.resize(E, E + 1);
>         +  for (unsigned I = 0; I < E; ++I)
>         +    Mask[Indices[I]] = I;
>         +}
>         +
>          namespace slpvectorizer {
>
>          /// Bottom Up SLP Vectorizer.
>         @@ -537,6 +546,7 @@ class BoUpSLP {
>            using StoreList = SmallVector<StoreInst *, 8>;
>            using ExtraValueToDebugLocsMap =
>                MapVector<Value *, SmallVector<Instruction *, 2>>;
>         +  using OrdersType = SmallVector<unsigned, 4>;
>
>            BoUpSLP(Function *Func, ScalarEvolution *Se,
>         TargetTransformInfo *Tti,
>                    TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
>         @@ -614,6 +624,14 @@ class BoUpSLP {
>
>            /// \returns The best order of instructions for vectorization.
>            Optional<ArrayRef<unsigned>> bestOrder() const {
>         +    assert(llvm::all_of(
>         +               NumOpsWantToKeepOrder,
>         +               [this](const
>         decltype(NumOpsWantToKeepOrder)::value_type &D) {
>         +                 return D.getFirst().size() ==
>         +                        VectorizableTree[0]->Scalars.size();
>         +               }) &&
>         +           "All orders must have the same size as number of
>         instructions in "
>         +           "tree node.");
>              auto I = std::max_element(
>                  NumOpsWantToKeepOrder.begin(),
>         NumOpsWantToKeepOrder.end(),
>                  [](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
>         @@ -627,6 +645,79 @@ class BoUpSLP {
>              return makeArrayRef(I->getFirst());
>            }
>
>         +  /// Builds the correct order for root instructions.
>         +  /// If some leaves have the same instructions to be
>         vectorized, we may
>         +  /// incorrectly evaluate the best order for the root node
>         (it is built for the
>         +  /// vector of instructions without repeated instructions
>         and, thus, has less
>         +  /// elements than the root node). This function builds the
>         correct order for
>         +  /// the root node.
>         +  /// For example, if the root node is \<a+b, a+c, a+d,
>         f+e\>, then the leaves
>         +  /// are \<a, a, a, f\> and \<b, c, d, e\>. When we try to
>         vectorize the first
>         +  /// leaf, it will be shrink to \<a, b\>. If instructions in
>         this leaf should
>         +  /// be reordered, the best order will be \<1, 0\>. We need
>         to extend this
>         +  /// order for the root node. For the root node this order
>         should look like
>         +  /// \<3, 0, 1, 2\>. This function extends the order for the
>         reused
>         +  /// instructions.
>         +  void findRootOrder(OrdersType &Order) {
>         +    // If the leaf has the same number of instructions to
>         vectorize as the root
>         +    // - order must be set already.
>         +    unsigned RootSize = VectorizableTree[0]->Scalars.size();
>         +    if (Order.size() == RootSize)
>         +      return;
>         +    SmallVector<unsigned, 4> RealOrder(Order.size());
>         +    std::swap(Order, RealOrder);
>         +    SmallVector<int, 4> Mask;
>         +    inversePermutation(RealOrder, Mask);
>         +    for (int I = 0, E = Mask.size(); I < E; ++I)
>         +      Order[I] = Mask[I];
>         +    // The leaf has less number of instructions - need to
>         find the true order of
>         +    // the root.
>         +    // Scan the nodes starting from the leaf back to the root.
>         +    const TreeEntry *PNode = VectorizableTree.back().get();
>         +    while (PNode) {
>         +      const TreeEntry &Node = *PNode;
>         +      PNode = Node.UserTreeIndices.back().UserTE;
>         +      if (Node.ReuseShuffleIndices.empty())
>         +        continue;
>         +      // Build the order for the parent node.
>         +      OrdersType NewOrder(Node.ReuseShuffleIndices.size(),
>         RootSize);
>         +      SmallVector<unsigned, 4> OrderCounter(Order.size(), 0);
>         +      // The algorithm of the order extension is:
>         +      // 1. Calculate the number of the same instructions for
>         the order.
>         +      // 2. Calculate the index of the new order: total
>         number of instructions
>         +      // with order less than the order of the current
>         instruction + reuse
>         +      // number of the current instruction.
>         +      // 3. The new order is just the index of the
>         instruction in the original
>         +      // vector of the instructions.
>         +      for (unsigned I : Node.ReuseShuffleIndices)
>         +        ++OrderCounter[Order[I]];
>         +      SmallVector<unsigned, 4> CurrentCounter(Order.size(), 0);
>         +      for (unsigned I = 0, E =
>         Node.ReuseShuffleIndices.size(); I < E; ++I) {
>         +        unsigned ReusedIdx = Node.ReuseShuffleIndices[I];
>         +        unsigned OrderIdx = Order[ReusedIdx];
>         +        unsigned NewIdx = 0;
>         +        for (unsigned J = 0; J < OrderIdx; ++J)
>         +          NewIdx += OrderCounter[J];
>         +        NewIdx += CurrentCounter[OrderIdx];
>         +        ++CurrentCounter[OrderIdx];
>         +        assert(NewOrder[NewIdx] == RootSize &&
>         +               "The order index should not be written already.");
>         +        NewOrder[NewIdx] = I;
>         +      }
>         +      std::swap(Order, NewOrder);
>         +      // If the size of the order is the same as number of
>         instructions in the
>         +      // root node, no need to extend it more.
>         +      if (Order.size() == RootSize)
>         +        break;
>         +    }
>         +    assert((!PNode || Order.size() == RootSize) &&
>         +           "Root node is expected or the size of the order
>         must be the same as "
>         +           "the number of elements in the root node.");
>         +    assert(llvm::all_of(Order,
>         +                        [RootSize](unsigned Val) { return Val
>         != RootSize; }) &&
>         +           "All indices must be initialized");
>         +  }
>         +
>            /// \return The vector element size in bits to use when
>         vectorizing the
>            /// expression tree ending at \p V. If V is a store, the
>         size is the width of
>            /// the stored value. Otherwise, the size is the width of
>         the largest loaded
>         @@ -1467,7 +1558,7 @@ class BoUpSLP {
>              SmallVector<int, 4> ReuseShuffleIndices;
>
>              /// Does this entry require reordering?
>         -    ArrayRef<unsigned> ReorderIndices;
>         +    SmallVector<unsigned, 4> ReorderIndices;
>
>              /// Points back to the VectorizableTree.
>              ///
>         @@ -1660,7 +1751,7 @@ class BoUpSLP {
>              Last->State = Vectorized ? TreeEntry::Vectorize :
>         TreeEntry::NeedToGather;
>              Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
>                                               ReuseShuffleIndices.end());
>         -    Last->ReorderIndices = ReorderIndices;
>         +    Last->ReorderIndices.append(ReorderIndices.begin(),
>         ReorderIndices.end());
>              Last->setOperations(S);
>              if (Vectorized) {
>                for (int i = 0, e = VL.size(); i != e; ++i) {
>         @@ -2197,7 +2288,6 @@ class BoUpSLP {
>            /// List of users to ignore during scheduling and that
>         don't need extracting.
>            ArrayRef<Value *> UserIgnoreList;
>
>         -  using OrdersType = SmallVector<unsigned, 4>;
>            /// A DenseMapInfo implementation for holding DenseMaps and
>         DenseSets of
>            /// sorted SmallVectors of unsigned.
>            struct OrdersTypeDenseMapInfo {
>         @@ -2659,12 +2749,10 @@ void
>         BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
>                  });
>                  // Insert new order with initial value 0, if it does
>         not exist,
>                  // otherwise return the iterator to the existing one.
>         -        auto StoredCurrentOrderAndNum =
>         -           
>         NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
>         -        ++StoredCurrentOrderAndNum->getSecond();
>                  newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
>         -                     ReuseShuffleIndicies,
>         -                     StoredCurrentOrderAndNum->getFirst());
>         +                     ReuseShuffleIndicies, CurrentOrder);
>         +        findRootOrder(CurrentOrder);
>         +        ++NumOpsWantToKeepOrder[CurrentOrder];
>                  // This is a special case, as it does not gather, but
>         at the same time
>                  // we are not extending buildTree_rec() towards the
>         operands.
>                  ValueList Op0;
>         @@ -2741,13 +2829,13 @@ void
>         BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
>                      LLVM_DEBUG(dbgs() << "SLP: added a vector of
>         loads.\n");
>                    } else {
>                      // Need to reorder.
>         -            auto I =
>         NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
>         -            ++I->getSecond();
>                      TreeEntry *TE =
>                          newTreeEntry(VL, Bundle /*vectorized*/, S,
>         UserTreeIdx,
>         -                             ReuseShuffleIndicies,
>         I->getFirst());
>         +                             ReuseShuffleIndicies, CurrentOrder);
>                      TE->setOperandsInOrder();
>                      LLVM_DEBUG(dbgs() << "SLP: added a vector of
>         jumbled loads.\n");
>         +            findRootOrder(CurrentOrder);
>         +            ++NumOpsWantToKeepOrder[CurrentOrder];
>                    }
>                    return;
>                  }
>         @@ -3003,15 +3091,14 @@ void
>         BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
>                      buildTree_rec(Operands, Depth + 1, {TE, 0});
>                      LLVM_DEBUG(dbgs() << "SLP: added a vector of
>         stores.\n");
>                    } else {
>         -            // Need to reorder.
>         -            auto I =
>         NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
>         -            ++(I->getSecond());
>                      TreeEntry *TE =
>                          newTreeEntry(VL, Bundle /*vectorized*/, S,
>         UserTreeIdx,
>         -                             ReuseShuffleIndicies,
>         I->getFirst());
>         +                             ReuseShuffleIndicies, CurrentOrder);
>                      TE->setOperandsInOrder();
>                      buildTree_rec(Operands, Depth + 1, {TE, 0});
>                      LLVM_DEBUG(dbgs() << "SLP: added a vector of
>         jumbled stores.\n");
>         +            findRootOrder(CurrentOrder);
>         +            ++NumOpsWantToKeepOrder[CurrentOrder];
>                    }
>                    return;
>                  }
>         @@ -4141,15 +4228,6 @@ Value
>         *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
>            return V;
>          }
>
>         -static void inversePermutation(ArrayRef<unsigned> Indices,
>         -                               SmallVectorImpl<int> &Mask) {
>         -  Mask.clear();
>         -  const unsigned E = Indices.size();
>         -  Mask.resize(E);
>         -  for (unsigned I = 0; I < E; ++I)
>         -    Mask[Indices[I]] = I;
>         -}
>         -
>          Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
>            IRBuilder<>::InsertPointGuard Guard(Builder);
>
>         @@ -6873,8 +6951,10 @@ class HorizontalReduction {
>                ArrayRef<Value *> VL = makeArrayRef(&ReducedVals[i],
>         ReduxWidth);
>                V.buildTree(VL, ExternallyUsedValues, IgnoreList);
>                Optional<ArrayRef<unsigned>> Order = V.bestOrder();
>         -      // TODO: Handle orders of size less than number of
>         elements in the vector.
>         -      if (Order && Order->size() == VL.size()) {
>         +      if (Order) {
>         +        assert(Order->size() == VL.size() &&
>         +               "Order size must be the same as number of
>         vectorized "
>         +               "instructions.");
>                  // TODO: reorder tree nodes without tree rebuilding.
>                  SmallVector<Value *, 4> ReorderedOps(VL.size());
>                  llvm::transform(*Order, ReorderedOps.begin(),
>
>         diff  --git
>         a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
>         b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
>         index 8b12b9272c7e..a84b1f7e4fcd 100644
>         ---
>         a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
>         +++
>         b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
>         @@ -11,7 +11,7 @@
>          @h = common dso_local global float 0.000000e+00, align 4
>
>          define dso_local void @j() local_unnamed_addr {
>         -; CHECK-LABEL: define {{[^@]+}}@j(
>         +; CHECK-LABEL: @j(
>          ; CHECK-NEXT:  entry:
>          ; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @b, align 8
>          ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds
>         i32, i32* [[TMP0]], i64 4
>         @@ -19,42 +19,39 @@ define dso_local void @j()
>         local_unnamed_addr {
>          ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds
>         i32, i32* [[TMP0]], i64 5
>          ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to
>         <2 x i32>*
>          ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>*
>         [[TMP1]], align 4
>         -; CHECK-NEXT:    [[REORDER_SHUFFLE1:%.*]] = shufflevector <2
>         x i32> [[TMP2]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
>          ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds
>         i32, i32* [[TMP0]], i64 13
>          ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX1]] to
>         <2 x i32>*
>          ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, <2 x i32>*
>         [[TMP3]], align 4
>         -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x
>         i32> [[TMP4]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
>         -; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32>
>         [[REORDER_SHUFFLE]], [[REORDER_SHUFFLE1]]
>         +; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]],
>         [[TMP2]]
>          ; CHECK-NEXT:    [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to
>         <2 x float>
>          ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x float> [[TMP6]],
>         <float 1.000000e+01, float 1.000000e+01>
>         -; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x float> <float
>         0.000000e+00, float 1.000000e+00>, [[TMP7]]
>         -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float>
>         [[TMP8]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0,
>         i32 1>
>         -; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float>
>         [[SHUFFLE]], i32 1
>         +; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x float> <float
>         1.000000e+00, float 0.000000e+00>, [[TMP7]]
>         +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float>
>         [[TMP8]], <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 1,
>         i32 1>
>         +; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float>
>         [[SHUFFLE]], i32 0
>          ; CHECK-NEXT:    store float [[TMP9]], float* @g, align 4
>         -; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float>
>         [[SHUFFLE]], <float -1.000000e+00, float -1.000000e+00, float
>         1.000000e+00, float 1.000000e+00>
>         -; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 2
>         +; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float>
>         [[SHUFFLE]], <float -1.000000e+00, float 1.000000e+00, float
>         -1.000000e+00, float 1.000000e+00>
>         +; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 3
>          ; CHECK-NEXT:    store float [[TMP11]], float* @c, align 4
>         -; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 0
>         +; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 2
>          ; CHECK-NEXT:    store float [[TMP12]], float* @d, align 4
>         -; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 3
>         +; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 1
>          ; CHECK-NEXT:    store float [[TMP13]], float* @e, align 4
>         -; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 1
>         +; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float>
>         [[TMP10]], i32 0
>          ; CHECK-NEXT:    store float [[TMP14]], float* @f, align 4
>          ; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds
>         i32, i32* [[TMP0]], i64 14
>          ; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds
>         i32, i32* [[TMP0]], i64 15
>          ; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* @a, align 4
>          ; CHECK-NEXT:    [[CONV19:%.*]] = sitofp i32 [[TMP15]] to float
>         -; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x float>
>         undef, float [[CONV19]], i32 0
>         -; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float>
>         [[TMP16]], float -1.000000e+00, i32 1
>         -; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x float>
>         [[SHUFFLE]], i32 0
>         -; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x float>
>         [[TMP17]], float [[TMP18]], i32 2
>         -; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x float>
>         [[TMP19]], float -1.000000e+00, i32 3
>         -; CHECK-NEXT:    [[TMP21:%.*]] = fsub <4 x float> [[TMP10]],
>         [[TMP20]]
>         -; CHECK-NEXT:    [[TMP22:%.*]] = fadd <4 x float> [[TMP10]],
>         [[TMP20]]
>         -; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x float>
>         [[TMP21]], <4 x float> [[TMP22]], <4 x i32> <i32 0, i32 5, i32
>         2, i32 7>
>         -; CHECK-NEXT:    [[TMP24:%.*]] = fptosi <4 x float> [[TMP23]]
>         to <4 x i32>
>         -; CHECK-NEXT:    [[TMP25:%.*]] = bitcast i32* [[ARRAYIDX1]]
>         to <4 x i32>*
>         -; CHECK-NEXT:    store <4 x i32> [[TMP24]], <4 x i32>*
>         [[TMP25]], align 4
>         +; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x float>
>         <float -1.000000e+00, float -1.000000e+00, float undef, float
>         undef>, float [[CONV19]], i32 2
>         +; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x float>
>         [[SHUFFLE]], i32 2
>         +; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x float>
>         [[TMP16]], float [[TMP17]], i32 3
>         +; CHECK-NEXT:    [[TMP19:%.*]] = fadd <4 x float> [[TMP10]],
>         [[TMP18]]
>         +; CHECK-NEXT:    [[TMP20:%.*]] = fsub <4 x float> [[TMP10]],
>         [[TMP18]]
>         +; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x float>
>         [[TMP19]], <4 x float> [[TMP20]], <4 x i32> <i32 0, i32 1, i32
>         6, i32 7>
>         +; CHECK-NEXT:    [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]]
>         to <4 x i32>
>         +; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x
>         i32> [[TMP22]], <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32
>         3, i32 1>
>         +; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]]
>         to <4 x i32>*
>         +; CHECK-NEXT:    store <4 x i32> [[REORDER_SHUFFLE]], <4 x
>         i32>* [[TMP23]], align 4
>          ; CHECK-NEXT:    ret void
>          ;
>          entry:
>
>         diff  --git
>         a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>         b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>         index 384e540efb79..9ed21a1c3f8c 100644
>         ---
>         a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>         +++
>         b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>         @@ -14,11 +14,10 @@ define void @hoge() {
>          ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16>
>         undef, i16 [[T]], i32 0
>          ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16>
>         [[TMP0]], i16 undef, i32 1
>          ; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2
>         x i32>
>         -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x
>         i32> [[TMP2]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
>         -; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32> <i32 63,
>         i32 undef>, [[REORDER_SHUFFLE]]
>         +; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32> <i32 undef,
>         i32 63>, [[TMP2]]
>          ; CHECK-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
>         -; CHECK-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <2 x i32>
>         [[TMP4]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
>         -; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE5]],
>         <i32 undef, i32 15, i32 31, i32 47>
>         +; CHECK-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <2 x i32>
>         [[TMP4]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
>         +; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE5]],
>         <i32 15, i32 31, i32 47, i32 undef>
>          ; CHECK-NEXT:    [[TMP6:%.*]] = call i32
>         @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
>          ; CHECK-NEXT:    [[T19:%.*]] = select i1 undef, i32 [[TMP6]],
>         i32 undef
>          ; CHECK-NEXT:    [[T20:%.*]] = icmp sgt i32 [[T19]], 63
>
>         diff  --git
>         a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>         b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>         index b7cff2dac5d4..02e7c5b37f3e 100644
>         ---
>         a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>         +++
>         b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>         @@ -7,16 +7,15 @@ define i32 @foo(i32* nocapture readonly
>         %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4
>          ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds
>         i32, i32* [[ARR:%.*]], i64 1
>          ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR]] to <2 x
>         i32>*
>          ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>*
>         [[TMP0]], align 4
>         -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x
>         i32> [[TMP1]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
>         -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32>
>         [[REORDER_SHUFFLE]], <2 x i32> undef, <8 x i32> <i32 0, i32 0,
>         i32 0, i32 0, i32 0, i32 0, i32 1, i32 1>
>         -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32>
>         undef, i32 [[A1:%.*]], i32 0
>         -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32>
>         [[TMP2]], i32 [[A2:%.*]], i32 1
>         -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32>
>         [[TMP3]], i32 [[A3:%.*]], i32 2
>         -; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32>
>         [[TMP4]], i32 [[A4:%.*]], i32 3
>         -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32>
>         [[TMP5]], i32 [[A5:%.*]], i32 4
>         -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32>
>         [[TMP6]], i32 [[A6:%.*]], i32 5
>         -; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32>
>         [[TMP7]], i32 [[A7:%.*]], i32 6
>         -; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32>
>         [[TMP8]], i32 [[A8:%.*]], i32 7
>         +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32>
>         [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32
>         1, i32 1, i32 1, i32 1, i32 1>
>         +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32>
>         undef, i32 [[A7:%.*]], i32 0
>         +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32>
>         [[TMP2]], i32 [[A8:%.*]], i32 1
>         +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32>
>         [[TMP3]], i32 [[A1:%.*]], i32 2
>         +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32>
>         [[TMP4]], i32 [[A2:%.*]], i32 3
>         +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32>
>         [[TMP5]], i32 [[A3:%.*]], i32 4
>         +; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32>
>         [[TMP6]], i32 [[A4:%.*]], i32 5
>         +; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32>
>         [[TMP7]], i32 [[A5:%.*]], i32 6
>         +; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32>
>         [[TMP8]], i32 [[A6:%.*]], i32 7
>          ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]],
>         [[TMP9]]
>          ; CHECK-NEXT:    [[TMP11:%.*]] = call i32
>         @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
>          ; CHECK-NEXT:    ret i32 [[TMP11]]
>         @@ -58,16 +57,15 @@ define i32 @foo1(i32* nocapture readonly
>         %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
>          ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds
>         i32, i32* [[ARR]], i64 3
>          ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x
>         i32>*
>          ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>*
>         [[TMP0]], align 4
>         -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x
>         i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32
>         3, i32 0>
>         -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32>
>         [[REORDER_SHUFFLE]], <4 x i32> undef, <8 x i32> <i32 0, i32 1,
>         i32 2, i32 0, i32 0, i32 3, i32 1, i32 0>
>         -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32>
>         undef, i32 [[A1:%.*]], i32 0
>         -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32>
>         [[TMP2]], i32 [[A2:%.*]], i32 1
>         -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32>
>         [[TMP3]], i32 [[A3:%.*]], i32 2
>         -; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32>
>         [[TMP4]], i32 [[A4:%.*]], i32 3
>         -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32>
>         [[TMP5]], i32 [[A5:%.*]], i32 4
>         -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32>
>         [[TMP6]], i32 [[A6:%.*]], i32 5
>         +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32>
>         [[TMP1]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32
>         1, i32 1, i32 2, i32 2, i32 3>
>         +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32>
>         undef, i32 [[A6:%.*]], i32 0
>         +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32>
>         [[TMP2]], i32 [[A1:%.*]], i32 1
>         +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32>
>         [[TMP3]], i32 [[A4:%.*]], i32 2
>         +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32>
>         [[TMP4]], i32 [[A5:%.*]], i32 3
>         +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32>
>         [[TMP5]], i32 [[A8:%.*]], i32 4
>         +; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32>
>         [[TMP6]], i32 [[A2:%.*]], i32 5
>          ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32>
>         [[TMP7]], i32 [[A7:%.*]], i32 6
>         -; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32>
>         [[TMP8]], i32 [[A8:%.*]], i32 7
>         +; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32>
>         [[TMP8]], i32 [[A3:%.*]], i32 7
>          ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]],
>         [[TMP9]]
>          ; CHECK-NEXT:    [[TMP11:%.*]] = call i32
>         @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
>          ; CHECK-NEXT:    ret i32 [[TMP11]]
>         @@ -113,16 +111,15 @@ define i32 @foo2(i32* nocapture readonly
>         %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
>          ; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds
>         i32, i32* [[ARR]], i64 1
>          ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x
>         i32>*
>          ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>*
>         [[TMP0]], align 4
>         -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x
>         i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32
>         0, i32 1>
>         -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32>
>         [[REORDER_SHUFFLE]], <4 x i32> undef, <8 x i32> <i32 0, i32 1,
>         i32 0, i32 2, i32 3, i32 2, i32 1, i32 3>
>         -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32>
>         undef, i32 [[A1:%.*]], i32 0
>         -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32>
>         [[TMP2]], i32 [[A2:%.*]], i32 1
>         -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32>
>         [[TMP3]], i32 [[A3:%.*]], i32 2
>         -; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32>
>         [[TMP4]], i32 [[A4:%.*]], i32 3
>         -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32>
>         [[TMP5]], i32 [[A5:%.*]], i32 4
>         -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32>
>         [[TMP6]], i32 [[A6:%.*]], i32 5
>         -; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32>
>         [[TMP7]], i32 [[A7:%.*]], i32 6
>         -; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32>
>         [[TMP8]], i32 [[A8:%.*]], i32 7
>         +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32>
>         [[TMP1]], <4 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32
>         1, i32 2, i32 2, i32 3, i32 3>
>         +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32>
>         undef, i32 [[A4:%.*]], i32 0
>         +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32>
>         [[TMP2]], i32 [[A6:%.*]], i32 1
>         +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32>
>         [[TMP3]], i32 [[A5:%.*]], i32 2
>         +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32>
>         [[TMP4]], i32 [[A8:%.*]], i32 3
>         +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32>
>         [[TMP5]], i32 [[A2:%.*]], i32 4
>         +; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32>
>         [[TMP6]], i32 [[A7:%.*]], i32 5
>         +; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32>
>         [[TMP7]], i32 [[A1:%.*]], i32 6
>         +; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32>
>         [[TMP8]], i32 [[A3:%.*]], i32 7
>          ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]],
>         [[TMP9]]
>          ; CHECK-NEXT:    [[TMP11:%.*]] = call i32
>         @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
>          ; CHECK-NEXT:    ret i32 [[TMP11]]
>
>
>
>         _______________________________________________
>         llvm-commits mailing list
>         llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>         https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>         <https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>
>
>
> -- 
>
>
> Bogdan Graur | 	 Software Engineer | 	 bgraur at google.com
> <mailto:bgraur at google.com> | 	 
>
>
> Google Germany GmbH
>
> Erika-Mann-Str. 33
> D-80636 Munich
>
> Geschäftsführer:Paul Manicle, Halimah DeLaine Prado
>
> Registergericht und -nummer: Hamburg, HRB 86891
>
> Sitz der Gesellschaft: Hamburg
>
>
> Diese E-Mail ist vertraulich. Wenn Sie nicht der richtige Adressat
> sind, leiten Sie diese bitte nicht weiter, informieren Sie den
> Absender und löschen Sie die E-Mail und alle Anhänge. Vielen Dank.
>
>       
>
> This e-mail is confidential. If you are not the right addressee please
> do not forward it, please inform the sender, and please erase this
> e-mail including any attachments. Thanks.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200919/82186e75/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_0xFEE42AA7A747E6CE.asc
Type: application/pgp-keys
Size: 3844 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200919/82186e75/attachment-0001.key>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature
Type: application/pgp-signature
Size: 840 bytes
Desc: OpenPGP digital signature
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200919/82186e75/attachment-0001.sig>
    
    
More information about the llvm-commits
mailing list