<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body>
    <p>Hi Bogdan, yes, this is what I thought! Thanks, I'll investigate
      it. <br>
    </p>
    <pre class="moz-signature" cols="72">-------------
Best regards,
Alexey Bataev</pre>
    <div class="moz-cite-prefix">19.09.2020 11:38 AM, Bogdan Graur
      пишет:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAPpf2i_dkMgT1yYfVBLczP-zzxVUjbjvqMZKVmKAYC9Yq=nnNw@mail.gmail.com">
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
      <div dir="ltr">Hi folks,
        <div><br>
        </div>
        <div>I've attached with gdb to the running clang process (after
          ~1h 40m it started compiling) and got this trace:</div>
        <div><br>
        </div>
        <div>(gdb) bt<br>
          #0  0x0000557ce35c98af in
llvm::slpvectorizer::BoUpSLP::findRootOrder(llvm::SmallVector<unsigned
          int, 4u>&) ()<br>
          #1  0x0000557ce4212a56 in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #2  0x0000557ce4212c2d in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #3  0x0000557ce42120fd in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #4  0x0000557ce4211cc6 in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #5  0x0000557ce4212c50 in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #6  0x0000557ce4211cc6 in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #7  0x0000557ce421341b in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #8  0x0000557ce4212c50 in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #9  0x0000557ce42147ef in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #10 0x0000557ce4212996 in
llvm::slpvectorizer::BoUpSLP::buildTree_rec(llvm::ArrayRef<llvm::Value*>,
          unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo
          const&) ()<br>
          #11 0x0000557ce420f3ba in
llvm::slpvectorizer::BoUpSLP::buildTree(llvm::ArrayRef<llvm::Value*>,
          llvm::MapVector<llvm::Value*,
          llvm::SmallVector<llvm::Instruction*, 2u>,
          llvm::DenseMap<llvm::Value*, unsigned int,
          llvm::DenseMapInfo<llvm::Value*>,
          llvm::detail::DenseMapPair<llvm::Value*, unsigned int>
          >, std::__u::vector<std::__u::pair<llvm::Value*,
          llvm::SmallVector<llvm::Instruction*, 2u> >,
          std::__u::allocator<std::__u::pair<llvm::Value*,
          llvm::SmallVector<llvm::Instruction*, 2u> > > >
          >&, llvm::ArrayRef<llvm::Value*>) ()            
                                                                       
                                                                       
                                                                       
                     <br>
          #12 0x0000557ce420afca in
          llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*,
          llvm::Value*, llvm::BasicBlock*,
          llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*)
          ()<br>
          #13 0x0000557ce420269d in
          llvm::SLPVectorizerPass::runImpl(llvm::Function&,
          llvm::ScalarEvolution*, llvm::TargetTransformInfo*,
          llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*,
          llvm::DominatorTree*, llvm::AssumptionCache*,
          llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) ()<br>
          #14 0x0000557ce2e7c7af in
          llvm::SLPVectorizerPass::run(llvm::Function&,
          llvm::AnalysisManager<llvm::Function>&) ()<br>
          #15 0x0000557ce2e7c5b2 in
          llvm::detail::PassModel<llvm::Function,
          llvm::SLPVectorizerPass, llvm::PreservedAnalyses,
llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&,
          llvm::AnalysisManager<llvm::Function>&) ()<br>
          #16 0x0000557ce39153c7 in llvm::PassManager<llvm::Function,
llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&,
          llvm::AnalysisManager<llvm::Function>&) ()<br>
          #17 0x0000557ce44d8c69 in
          llvm::detail::PassModel<llvm::Module,
llvm::ModuleToFunctionPassAdaptor<llvm::PassManager<llvm::Function,
          llvm::AnalysisManager<llvm::Function>> >,
          llvm::PreservedAnalyses,
          llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
          llvm::AnalysisManager<llvm::Module>&) ()<br>
          #18 0x0000557ce2fda20b in llvm::PassManager<llvm::Module,
          llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
          llvm::AnalysisManager<llvm::Module>&) ()<br>
          #19 0x0000557ce2fda0b2 in
          llvm::detail::PassModel<llvm::Module,
          llvm::PassManager<llvm::Module,
          llvm::AnalysisManager<llvm::Module>>,
          llvm::PreservedAnalyses,
          llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
          llvm::AnalysisManager<llvm::Module>&) ()<br>
          #20 0x0000557ce2fda20b in llvm::PassManager<llvm::Module,
          llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&,
          llvm::AnalysisManager<llvm::Module>&) ()<br>
          #21 0x0000557ce44b7c08 in (anonymous
namespace)::EmitAssemblyHelper::EmitAssemblyWithNewPassManager(clang::BackendAction,
          std::__u::unique_ptr<llvm::raw_pwrite_stream,
          std::__u::default_delete<llvm::raw_pwrite_stream> >)
          ()<br>
          #22 0x0000557ce31bb048 in
          clang::EmitBackendOutput(clang::DiagnosticsEngine&,
          clang::HeaderSearchOptions const&, clang::CodeGenOptions
          const&, clang::TargetOptions const&,
          clang::LangOptions const&, llvm::DataLayout const&,
          llvm::Module*, clang::BackendAction,
          std::__u::unique_ptr<llvm::raw_pwrite_stream,
          std::__u::default_delete<llvm::raw_pwrite_stream> >)
          ()<br>
          #23 0x0000557ce31ba7f6 in
          clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&)
          ()<br>
          #24 0x0000557ce3005d80 in clang::ParseAST(clang::Sema&,
          bool, bool) ()<br>
          #25 0x0000557ce3138515 in clang::FrontendAction::Execute() ()<br>
          #26 0x0000557ce3137b54 in
          clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
          ()<br>
          #27 0x0000557ce3136bb0 in
          clang::ExecuteCompilerInvocation(clang::CompilerInstance*) ()<br>
          #28 0x0000557ce312a4ae in cc1_main(llvm::ArrayRef<char
          const*>, char const*, void*) ()<br>
          #29 0x0000557ce312997d in
          ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&)
          ()<br>
          #30 0x0000557ce42ab049 in main ()<br>
        </div>
        <div><br>
        </div>
        <div>It looks to me the control flow is stuck in
llvm::slpvectorizer::BoUpSLP::findRootOrder(llvm::SmallVector<unsigned
          int, 4u>&) () as I issued a GDB 'finish' command which
          didn't finish after ~5m.</div>
        <br>
        <div>Please let me know if you need more info!</div>
        <div><br>
        </div>
        <div>Best,</div>
      </div>
      <br>
      <div class="gmail_quote">
        <div dir="ltr" class="gmail_attr">On Fri, Sep 18, 2020 at 10:47
          PM Eric Christopher <<a href="mailto:echristo@gmail.com"
            moz-do-not-send="true">echristo@gmail.com</a>> wrote:<br>
        </div>
        <blockquote class="gmail_quote" style="margin:0px 0px 0px
          0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
          <div dir="ltr">Following up here:
            <div><br>
            </div>
            <div>After discussing with Alexey on IRC I've temporarily
              reverted this. Bogdan was seeing infinite loops in
              compilation and is going to follow up with a backtrace and
              a test case later if the backtrace isn't enough.</div>
            <div><br>
            </div>
            <div>Reverted thusly:</div>
            <div>echristo@athyra ~/s/llvm-project (master)> git push<br>
              To github.com:llvm/llvm-project.git<br>
                 b168bbfae42..ecfd8161bf4  master -> master<br>
            </div>
            <div><br>
            </div>
            <div>Thanks a ton Alexey, we'll get back to you asap.</div>
            <div><br>
            </div>
            <div>-eric</div>
            <div><br>
            </div>
          </div>
          <br>
          <div class="gmail_quote">
            <div dir="ltr" class="gmail_attr">On Fri, Sep 18, 2020 at
              9:38 AM Alexey Bataev via llvm-commits <<a
                href="mailto:llvm-commits@lists.llvm.org"
                target="_blank" moz-do-not-send="true">llvm-commits@lists.llvm.org</a>>
              wrote:<br>
            </div>
            <blockquote class="gmail_quote" style="margin:0px 0px 0px
              0.8ex;border-left:1px solid
              rgb(204,204,204);padding-left:1ex"><br>
              Author: Alexey Bataev<br>
              Date: 2020-09-18T09:34:59-04:00<br>
              New Revision: 455ca0ebb69210046928fedffe292420a30f89ad<br>
              <br>
              URL: <a
href="https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad"
                rel="noreferrer" target="_blank" moz-do-not-send="true">https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad</a><br>
              DIFF: <a
href="https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad.diff"
                rel="noreferrer" target="_blank" moz-do-not-send="true">https://github.com/llvm/llvm-project/commit/455ca0ebb69210046928fedffe292420a30f89ad.diff</a><br>
              <br>
              LOG: [SLP] Allow reordering of vectorization trees with
              reused instructions.<br>
              <br>
              If some leaves have the same instructions to be
              vectorized, we may<br>
              incorrectly evaluate the best order for the root node (it
              is built for the<br>
              vector of instructions without repeated instructions and,
              thus, has less<br>
              elements than the root node). In this case we just can not
              try to reorder<br>
              the tree + we may calculate the wrong number of nodes that
              requre the<br>
              same reordering.<br>
              For example, if the root node is \<a+b, a+c, a+d,
              f+e\>, then the leaves<br>
              are \<a, a, a, f\> and \<b, c, d, e\>. When we
              try to vectorize the first<br>
              leaf, it will be shrink to \<a, b\>. If instructions
              in this leaf should<br>
              be reordered, the best order will be \<1, 0\>. We
              need to extend this<br>
              order for the root node. For the root node this order
              should look like<br>
              \<3, 0, 1, 2\>. This patch allows extension of the
              orders of the nodes<br>
              with the reused instructions.<br>
              <br>
              Reviewed By: RKSimon<br>
              <br>
              Differential Revision: <a
                href="https://reviews.llvm.org/D45263" rel="noreferrer"
                target="_blank" moz-do-not-send="true">https://reviews.llvm.org/D45263</a><br>
              <br>
              Added: <br>
              <br>
              <br>
              Modified: <br>
                  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
                 
              llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll<br>
                 
              llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll<br>
                 
              llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll<br>
              <br>
              Removed: <br>
              <br>
              <br>
              <br>
################################################################################<br>
              diff  --git
              a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
              b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
              index c487301177c1..e4cad01e958a 100644<br>
              --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
              +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
              @@ -523,6 +523,15 @@ static bool isSimple(Instruction *I)
              {<br>
              <br>
               namespace llvm {<br>
              <br>
              +static void inversePermutation(ArrayRef<unsigned>
              Indices,<br>
              +                               SmallVectorImpl<int>
              &Mask) {<br>
              +  Mask.clear();<br>
              +  const unsigned E = Indices.size();<br>
              +  Mask.resize(E, E + 1);<br>
              +  for (unsigned I = 0; I < E; ++I)<br>
              +    Mask[Indices[I]] = I;<br>
              +}<br>
              +<br>
               namespace slpvectorizer {<br>
              <br>
               /// Bottom Up SLP Vectorizer.<br>
              @@ -537,6 +546,7 @@ class BoUpSLP {<br>
                 using StoreList = SmallVector<StoreInst *, 8>;<br>
                 using ExtraValueToDebugLocsMap =<br>
                     MapVector<Value *, SmallVector<Instruction *,
              2>>;<br>
              +  using OrdersType = SmallVector<unsigned, 4>;<br>
              <br>
                 BoUpSLP(Function *Func, ScalarEvolution *Se,
              TargetTransformInfo *Tti,<br>
                         TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo
              *Li,<br>
              @@ -614,6 +624,14 @@ class BoUpSLP {<br>
              <br>
                 /// \returns The best order of instructions for
              vectorization.<br>
                 Optional<ArrayRef<unsigned>> bestOrder()
              const {<br>
              +    assert(llvm::all_of(<br>
              +               NumOpsWantToKeepOrder,<br>
              +               [this](const
              decltype(NumOpsWantToKeepOrder)::value_type &D) {<br>
              +                 return D.getFirst().size() ==<br>
              +                       
              VectorizableTree[0]->Scalars.size();<br>
              +               }) &&<br>
              +           "All orders must have the same size as number
              of instructions in "<br>
              +           "tree node.");<br>
                   auto I = std::max_element(<br>
                       NumOpsWantToKeepOrder.begin(),
              NumOpsWantToKeepOrder.end(),<br>
                       [](const
              decltype(NumOpsWantToKeepOrder)::value_type &D1,<br>
              @@ -627,6 +645,79 @@ class BoUpSLP {<br>
                   return makeArrayRef(I->getFirst());<br>
                 }<br>
              <br>
              +  /// Builds the correct order for root instructions.<br>
              +  /// If some leaves have the same instructions to be
              vectorized, we may<br>
              +  /// incorrectly evaluate the best order for the root
              node (it is built for the<br>
              +  /// vector of instructions without repeated
              instructions and, thus, has less<br>
              +  /// elements than the root node). This function builds
              the correct order for<br>
              +  /// the root node.<br>
              +  /// For example, if the root node is \<a+b, a+c,
              a+d, f+e\>, then the leaves<br>
              +  /// are \<a, a, a, f\> and \<b, c, d, e\>.
              When we try to vectorize the first<br>
              +  /// leaf, it will be shrink to \<a, b\>. If
              instructions in this leaf should<br>
              +  /// be reordered, the best order will be \<1,
              0\>. We need to extend this<br>
              +  /// order for the root node. For the root node this
              order should look like<br>
              +  /// \<3, 0, 1, 2\>. This function extends the
              order for the reused<br>
              +  /// instructions.<br>
              +  void findRootOrder(OrdersType &Order) {<br>
              +    // If the leaf has the same number of instructions to
              vectorize as the root<br>
              +    // - order must be set already.<br>
              +    unsigned RootSize =
              VectorizableTree[0]->Scalars.size();<br>
              +    if (Order.size() == RootSize)<br>
              +      return;<br>
              +    SmallVector<unsigned, 4>
              RealOrder(Order.size());<br>
              +    std::swap(Order, RealOrder);<br>
              +    SmallVector<int, 4> Mask;<br>
              +    inversePermutation(RealOrder, Mask);<br>
              +    for (int I = 0, E = Mask.size(); I < E; ++I)<br>
              +      Order[I] = Mask[I];<br>
              +    // The leaf has less number of instructions - need to
              find the true order of<br>
              +    // the root.<br>
              +    // Scan the nodes starting from the leaf back to the
              root.<br>
              +    const TreeEntry *PNode =
              VectorizableTree.back().get();<br>
              +    while (PNode) {<br>
              +      const TreeEntry &Node = *PNode;<br>
              +      PNode = Node.UserTreeIndices.back().UserTE;<br>
              +      if (Node.ReuseShuffleIndices.empty())<br>
              +        continue;<br>
              +      // Build the order for the parent node.<br>
              +      OrdersType
              NewOrder(Node.ReuseShuffleIndices.size(), RootSize);<br>
              +      SmallVector<unsigned, 4>
              OrderCounter(Order.size(), 0);<br>
              +      // The algorithm of the order extension is:<br>
              +      // 1. Calculate the number of the same instructions
              for the order.<br>
              +      // 2. Calculate the index of the new order: total
              number of instructions<br>
              +      // with order less than the order of the current
              instruction + reuse<br>
              +      // number of the current instruction.<br>
              +      // 3. The new order is just the index of the
              instruction in the original<br>
              +      // vector of the instructions.<br>
              +      for (unsigned I : Node.ReuseShuffleIndices)<br>
              +        ++OrderCounter[Order[I]];<br>
              +      SmallVector<unsigned, 4>
              CurrentCounter(Order.size(), 0);<br>
              +      for (unsigned I = 0, E =
              Node.ReuseShuffleIndices.size(); I < E; ++I) {<br>
              +        unsigned ReusedIdx = Node.ReuseShuffleIndices[I];<br>
              +        unsigned OrderIdx = Order[ReusedIdx];<br>
              +        unsigned NewIdx = 0;<br>
              +        for (unsigned J = 0; J < OrderIdx; ++J)<br>
              +          NewIdx += OrderCounter[J];<br>
              +        NewIdx += CurrentCounter[OrderIdx];<br>
              +        ++CurrentCounter[OrderIdx];<br>
              +        assert(NewOrder[NewIdx] == RootSize &&<br>
              +               "The order index should not be written
              already.");<br>
              +        NewOrder[NewIdx] = I;<br>
              +      }<br>
              +      std::swap(Order, NewOrder);<br>
              +      // If the size of the order is the same as number
              of instructions in the<br>
              +      // root node, no need to extend it more.<br>
              +      if (Order.size() == RootSize)<br>
              +        break;<br>
              +    }<br>
              +    assert((!PNode || Order.size() == RootSize)
              &&<br>
              +           "Root node is expected or the size of the
              order must be the same as "<br>
              +           "the number of elements in the root node.");<br>
              +    assert(llvm::all_of(Order,<br>
              +                        [RootSize](unsigned Val) { return
              Val != RootSize; }) &&<br>
              +           "All indices must be initialized");<br>
              +  }<br>
              +<br>
                 /// \return The vector element size in bits to use when
              vectorizing the<br>
                 /// expression tree ending at \p V. If V is a store,
              the size is the width of<br>
                 /// the stored value. Otherwise, the size is the width
              of the largest loaded<br>
              @@ -1467,7 +1558,7 @@ class BoUpSLP {<br>
                   SmallVector<int, 4> ReuseShuffleIndices;<br>
              <br>
                   /// Does this entry require reordering?<br>
              -    ArrayRef<unsigned> ReorderIndices;<br>
              +    SmallVector<unsigned, 4> ReorderIndices;<br>
              <br>
                   /// Points back to the VectorizableTree.<br>
                   ///<br>
              @@ -1660,7 +1751,7 @@ class BoUpSLP {<br>
                   Last->State = Vectorized ? TreeEntry::Vectorize :
              TreeEntry::NeedToGather;<br>
                 
               Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),<br>
                                                   
              ReuseShuffleIndices.end());<br>
              -    Last->ReorderIndices = ReorderIndices;<br>
              +   
              Last->ReorderIndices.append(ReorderIndices.begin(),
              ReorderIndices.end());<br>
                   Last->setOperations(S);<br>
                   if (Vectorized) {<br>
                     for (int i = 0, e = VL.size(); i != e; ++i) {<br>
              @@ -2197,7 +2288,6 @@ class BoUpSLP {<br>
                 /// List of users to ignore during scheduling and that
              don't need extracting.<br>
                 ArrayRef<Value *> UserIgnoreList;<br>
              <br>
              -  using OrdersType = SmallVector<unsigned, 4>;<br>
                 /// A DenseMapInfo implementation for holding DenseMaps
              and DenseSets of<br>
                 /// sorted SmallVectors of unsigned.<br>
                 struct OrdersTypeDenseMapInfo {<br>
              @@ -2659,12 +2749,10 @@ void
              BoUpSLP::buildTree_rec(ArrayRef<Value *> VL,
              unsigned Depth,<br>
                       });<br>
                       // Insert new order with initial value 0, if it
              does not exist,<br>
                       // otherwise return the iterator to the existing
              one.<br>
              -        auto StoredCurrentOrderAndNum =<br>
              -           
              NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;<br>
              -        ++StoredCurrentOrderAndNum->getSecond();<br>
                       newTreeEntry(VL, Bundle /*vectorized*/, S,
              UserTreeIdx,<br>
              -                     ReuseShuffleIndicies,<br>
              -                   
               StoredCurrentOrderAndNum->getFirst());<br>
              +                     ReuseShuffleIndicies, CurrentOrder);<br>
              +        findRootOrder(CurrentOrder);<br>
              +        ++NumOpsWantToKeepOrder[CurrentOrder];<br>
                       // This is a special case, as it does not gather,
              but at the same time<br>
                       // we are not extending buildTree_rec() towards
              the operands.<br>
                       ValueList Op0;<br>
              @@ -2741,13 +2829,13 @@ void
              BoUpSLP::buildTree_rec(ArrayRef<Value *> VL,
              unsigned Depth,<br>
                           LLVM_DEBUG(dbgs() << "SLP: added a
              vector of loads.\n");<br>
                         } else {<br>
                           // Need to reorder.<br>
              -            auto I =
              NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;<br>
              -            ++I->getSecond();<br>
                           TreeEntry *TE =<br>
                               newTreeEntry(VL, Bundle /*vectorized*/,
              S, UserTreeIdx,<br>
              -                             ReuseShuffleIndicies,
              I->getFirst());<br>
              +                             ReuseShuffleIndicies,
              CurrentOrder);<br>
                           TE->setOperandsInOrder();<br>
                           LLVM_DEBUG(dbgs() << "SLP: added a
              vector of jumbled loads.\n");<br>
              +            findRootOrder(CurrentOrder);<br>
              +            ++NumOpsWantToKeepOrder[CurrentOrder];<br>
                         }<br>
                         return;<br>
                       }<br>
              @@ -3003,15 +3091,14 @@ void
              BoUpSLP::buildTree_rec(ArrayRef<Value *> VL,
              unsigned Depth,<br>
                           buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
                           LLVM_DEBUG(dbgs() << "SLP: added a
              vector of stores.\n");<br>
                         } else {<br>
              -            // Need to reorder.<br>
              -            auto I =
              NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;<br>
              -            ++(I->getSecond());<br>
                           TreeEntry *TE =<br>
                               newTreeEntry(VL, Bundle /*vectorized*/,
              S, UserTreeIdx,<br>
              -                             ReuseShuffleIndicies,
              I->getFirst());<br>
              +                             ReuseShuffleIndicies,
              CurrentOrder);<br>
                           TE->setOperandsInOrder();<br>
                           buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
                           LLVM_DEBUG(dbgs() << "SLP: added a
              vector of jumbled stores.\n");<br>
              +            findRootOrder(CurrentOrder);<br>
              +            ++NumOpsWantToKeepOrder[CurrentOrder];<br>
                         }<br>
                         return;<br>
                       }<br>
              @@ -4141,15 +4228,6 @@ Value
              *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {<br>
                 return V;<br>
               }<br>
              <br>
              -static void inversePermutation(ArrayRef<unsigned>
              Indices,<br>
              -                               SmallVectorImpl<int>
              &Mask) {<br>
              -  Mask.clear();<br>
              -  const unsigned E = Indices.size();<br>
              -  Mask.resize(E);<br>
              -  for (unsigned I = 0; I < E; ++I)<br>
              -    Mask[Indices[I]] = I;<br>
              -}<br>
              -<br>
               Value *BoUpSLP::vectorizeTree(TreeEntry *E) {<br>
                 IRBuilder<>::InsertPointGuard Guard(Builder);<br>
              <br>
              @@ -6873,8 +6951,10 @@ class HorizontalReduction {<br>
                     ArrayRef<Value *> VL =
              makeArrayRef(&ReducedVals[i], ReduxWidth);<br>
                     V.buildTree(VL, ExternallyUsedValues, IgnoreList);<br>
                     Optional<ArrayRef<unsigned>> Order =
              V.bestOrder();<br>
              -      // TODO: Handle orders of size less than number of
              elements in the vector.<br>
              -      if (Order && Order->size() == VL.size())
              {<br>
              +      if (Order) {<br>
              +        assert(Order->size() == VL.size() &&<br>
              +               "Order size must be the same as number of
              vectorized "<br>
              +               "instructions.");<br>
                       // TODO: reorder tree nodes without tree
              rebuilding.<br>
                       SmallVector<Value *, 4>
              ReorderedOps(VL.size());<br>
                       llvm::transform(*Order, ReorderedOps.begin(),<br>
              <br>
              diff  --git
              a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll<br>
              index 8b12b9272c7e..a84b1f7e4fcd 100644<br>
              ---
              a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll<br>
              +++
              b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll<br>
              @@ -11,7 +11,7 @@<br>
               @h = common dso_local global float 0.000000e+00, align 4<br>
              <br>
               define dso_local void @j() local_unnamed_addr {<br>
              -; CHECK-LABEL: define {{[^@]+}}@j(<br>
              +; CHECK-LABEL: @j(<br>
               ; CHECK-NEXT:  entry:<br>
               ; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @b,
              align 8<br>
               ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr
              inbounds i32, i32* [[TMP0]], i64 4<br>
              @@ -19,42 +19,39 @@ define dso_local void @j()
              local_unnamed_addr {<br>
               ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr
              inbounds i32, i32* [[TMP0]], i64 5<br>
               ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]]
              to <2 x i32>*<br>
               ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>,
              <2 x i32>* [[TMP1]], align 4<br>
              -; CHECK-NEXT:    [[REORDER_SHUFFLE1:%.*]] = shufflevector
              <2 x i32> [[TMP2]], <2 x i32> undef, <2 x
              i32> <i32 1, i32 0><br>
               ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr
              inbounds i32, i32* [[TMP0]], i64 13<br>
               ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32*
              [[ARRAYIDX1]] to <2 x i32>*<br>
               ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>,
              <2 x i32>* [[TMP3]], align 4<br>
              -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector
              <2 x i32> [[TMP4]], <2 x i32> undef, <2 x
              i32> <i32 1, i32 0><br>
              -; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32>
              [[REORDER_SHUFFLE]], [[REORDER_SHUFFLE1]]<br>
              +; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32>
              [[TMP4]], [[TMP2]]<br>
               ; CHECK-NEXT:    [[TMP6:%.*]] = sitofp <2 x i32>
              [[TMP5]] to <2 x float><br>
               ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x float>
              [[TMP6]], <float 1.000000e+01, float 1.000000e+01><br>
              -; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x float>
              <float 0.000000e+00, float 1.000000e+00>, [[TMP7]]<br>
              -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x
              float> [[TMP8]], <2 x float> undef, <4 x
              i32> <i32 0, i32 1, i32 0, i32 1><br>
              -; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x
              float> [[SHUFFLE]], i32 1<br>
              +; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x float>
              <float 1.000000e+00, float 0.000000e+00>, [[TMP7]]<br>
              +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x
              float> [[TMP8]], <2 x float> undef, <4 x
              i32> <i32 0, i32 0, i32 1, i32 1><br>
              +; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x
              float> [[SHUFFLE]], i32 0<br>
               ; CHECK-NEXT:    store float [[TMP9]], float* @g, align 4<br>
              -; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float>
              [[SHUFFLE]], <float -1.000000e+00, float -1.000000e+00,
              float 1.000000e+00, float 1.000000e+00><br>
              -; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 2<br>
              +; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float>
              [[SHUFFLE]], <float -1.000000e+00, float 1.000000e+00,
              float -1.000000e+00, float 1.000000e+00><br>
              +; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 3<br>
               ; CHECK-NEXT:    store float [[TMP11]], float* @c, align
              4<br>
              -; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 0<br>
              +; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 2<br>
               ; CHECK-NEXT:    store float [[TMP12]], float* @d, align
              4<br>
              -; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 3<br>
              +; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 1<br>
               ; CHECK-NEXT:    store float [[TMP13]], float* @e, align
              4<br>
              -; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 1<br>
              +; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x
              float> [[TMP10]], i32 0<br>
               ; CHECK-NEXT:    store float [[TMP14]], float* @f, align
              4<br>
               ; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr
              inbounds i32, i32* [[TMP0]], i64 14<br>
               ; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr
              inbounds i32, i32* [[TMP0]], i64 15<br>
               ; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* @a, align
              4<br>
               ; CHECK-NEXT:    [[CONV19:%.*]] = sitofp i32 [[TMP15]] to
              float<br>
              -; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x
              float> undef, float [[CONV19]], i32 0<br>
              -; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x
              float> [[TMP16]], float -1.000000e+00, i32 1<br>
              -; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x
              float> [[SHUFFLE]], i32 0<br>
              -; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x
              float> [[TMP17]], float [[TMP18]], i32 2<br>
              -; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x
              float> [[TMP19]], float -1.000000e+00, i32 3<br>
              -; CHECK-NEXT:    [[TMP21:%.*]] = fsub <4 x float>
              [[TMP10]], [[TMP20]]<br>
              -; CHECK-NEXT:    [[TMP22:%.*]] = fadd <4 x float>
              [[TMP10]], [[TMP20]]<br>
              -; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x
              float> [[TMP21]], <4 x float> [[TMP22]], <4 x
              i32> <i32 0, i32 5, i32 2, i32 7><br>
              -; CHECK-NEXT:    [[TMP24:%.*]] = fptosi <4 x float>
              [[TMP23]] to <4 x i32><br>
              -; CHECK-NEXT:    [[TMP25:%.*]] = bitcast i32*
              [[ARRAYIDX1]] to <4 x i32>*<br>
              -; CHECK-NEXT:    store <4 x i32> [[TMP24]], <4 x
              i32>* [[TMP25]], align 4<br>
              +; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x
              float> <float -1.000000e+00, float -1.000000e+00,
              float undef, float undef>, float [[CONV19]], i32 2<br>
              +; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x
              float> [[SHUFFLE]], i32 2<br>
              +; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x
              float> [[TMP16]], float [[TMP17]], i32 3<br>
              +; CHECK-NEXT:    [[TMP19:%.*]] = fadd <4 x float>
              [[TMP10]], [[TMP18]]<br>
              +; CHECK-NEXT:    [[TMP20:%.*]] = fsub <4 x float>
              [[TMP10]], [[TMP18]]<br>
              +; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x
              float> [[TMP19]], <4 x float> [[TMP20]], <4 x
              i32> <i32 0, i32 1, i32 6, i32 7><br>
              +; CHECK-NEXT:    [[TMP22:%.*]] = fptosi <4 x float>
              [[TMP21]] to <4 x i32><br>
              +; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector
              <4 x i32> [[TMP22]], <4 x i32> undef, <4 x
              i32> <i32 2, i32 0, i32 3, i32 1><br>
              +; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32*
              [[ARRAYIDX1]] to <4 x i32>*<br>
              +; CHECK-NEXT:    store <4 x i32>
              [[REORDER_SHUFFLE]], <4 x i32>* [[TMP23]], align 4<br>
               ; CHECK-NEXT:    ret void<br>
               ;<br>
               entry:<br>
              <br>
              diff  --git
              a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll<br>
              index 384e540efb79..9ed21a1c3f8c 100644<br>
              ---
              a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll<br>
              +++
              b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll<br>
              @@ -14,11 +14,10 @@ define void @hoge() {<br>
               ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x
              i16> undef, i16 [[T]], i32 0<br>
               ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x
              i16> [[TMP0]], i16 undef, i32 1<br>
               ; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i16>
              [[TMP1]] to <2 x i32><br>
              -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector
              <2 x i32> [[TMP2]], <2 x i32> undef, <2 x
              i32> <i32 1, i32 0><br>
              -; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32>
              <i32 63, i32 undef>, [[REORDER_SHUFFLE]]<br>
              +; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32>
              <i32 undef, i32 63>, [[TMP2]]<br>
               ; CHECK-NEXT:    [[TMP4:%.*]] = sub <2 x i32>
              [[TMP3]], undef<br>
              -; CHECK-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <2 x
              i32> [[TMP4]], <2 x i32> undef, <4 x i32>
              <i32 0, i32 1, i32 1, i32 1><br>
              -; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32>
              [[SHUFFLE5]], <i32 undef, i32 15, i32 31, i32 47><br>
              +; CHECK-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <2 x
              i32> [[TMP4]], <2 x i32> undef, <4 x i32>
              <i32 0, i32 0, i32 0, i32 1><br>
              +; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32>
              [[SHUFFLE5]], <i32 15, i32 31, i32 47, i32 undef><br>
               ; CHECK-NEXT:    [[TMP6:%.*]] = call i32
              @llvm.experimental.vector.reduce.smax.v4i32(<4 x
              i32> [[TMP5]])<br>
               ; CHECK-NEXT:    [[T19:%.*]] = select i1 undef, i32
              [[TMP6]], i32 undef<br>
               ; CHECK-NEXT:    [[T20:%.*]] = icmp sgt i32 [[T19]], 63<br>
              <br>
              diff  --git
              a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll<br>
              index b7cff2dac5d4..02e7c5b37f3e 100644<br>
              ---
              a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll<br>
              +++
              b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll<br>
              @@ -7,16 +7,15 @@ define i32 @foo(i32* nocapture readonly
              %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4<br>
               ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr
              inbounds i32, i32* [[ARR:%.*]], i64 1<br>
               ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR]] to
              <2 x i32>*<br>
               ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>,
              <2 x i32>* [[TMP0]], align 4<br>
              -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector
              <2 x i32> [[TMP1]], <2 x i32> undef, <2 x
              i32> <i32 1, i32 0><br>
              -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x
              i32> [[REORDER_SHUFFLE]], <2 x i32> undef, <8
              x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
              i32 1, i32 1><br>
              -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x
              i32> undef, i32 [[A1:%.*]], i32 0<br>
              -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x
              i32> [[TMP2]], i32 [[A2:%.*]], i32 1<br>
              -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x
              i32> [[TMP3]], i32 [[A3:%.*]], i32 2<br>
              -; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x
              i32> [[TMP4]], i32 [[A4:%.*]], i32 3<br>
              -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x
              i32> [[TMP5]], i32 [[A5:%.*]], i32 4<br>
              -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x
              i32> [[TMP6]], i32 [[A6:%.*]], i32 5<br>
              -; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x
              i32> [[TMP7]], i32 [[A7:%.*]], i32 6<br>
              -; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x
              i32> [[TMP8]], i32 [[A8:%.*]], i32 7<br>
              +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x
              i32> [[TMP1]], <2 x i32> undef, <8 x i32>
              <i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32
              1><br>
              +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x
              i32> undef, i32 [[A7:%.*]], i32 0<br>
              +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x
              i32> [[TMP2]], i32 [[A8:%.*]], i32 1<br>
              +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x
              i32> [[TMP3]], i32 [[A1:%.*]], i32 2<br>
              +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x
              i32> [[TMP4]], i32 [[A2:%.*]], i32 3<br>
              +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x
              i32> [[TMP5]], i32 [[A3:%.*]], i32 4<br>
              +; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x
              i32> [[TMP6]], i32 [[A4:%.*]], i32 5<br>
              +; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x
              i32> [[TMP7]], i32 [[A5:%.*]], i32 6<br>
              +; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x
              i32> [[TMP8]], i32 [[A6:%.*]], i32 7<br>
               ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32>
              [[SHUFFLE]], [[TMP9]]<br>
               ; CHECK-NEXT:    [[TMP11:%.*]] = call i32
              @llvm.experimental.vector.reduce.umin.v8i32(<8 x
              i32> [[TMP10]])<br>
               ; CHECK-NEXT:    ret i32 [[TMP11]]<br>
              @@ -58,16 +57,15 @@ define i32 @foo1(i32* nocapture
              readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a<br>
               ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr
              inbounds i32, i32* [[ARR]], i64 3<br>
               ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR]] to
              <4 x i32>*<br>
               ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>,
              <4 x i32>* [[TMP0]], align 4<br>
              -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector
              <4 x i32> [[TMP1]], <4 x i32> undef, <4 x
              i32> <i32 1, i32 2, i32 3, i32 0><br>
              -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x
              i32> [[REORDER_SHUFFLE]], <4 x i32> undef, <8
              x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 3,
              i32 1, i32 0><br>
              -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x
              i32> undef, i32 [[A1:%.*]], i32 0<br>
              -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x
              i32> [[TMP2]], i32 [[A2:%.*]], i32 1<br>
              -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x
              i32> [[TMP3]], i32 [[A3:%.*]], i32 2<br>
              -; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x
              i32> [[TMP4]], i32 [[A4:%.*]], i32 3<br>
              -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x
              i32> [[TMP5]], i32 [[A5:%.*]], i32 4<br>
              -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x
              i32> [[TMP6]], i32 [[A6:%.*]], i32 5<br>
              +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x
              i32> [[TMP1]], <4 x i32> undef, <8 x i32>
              <i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32
              3><br>
              +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x
              i32> undef, i32 [[A6:%.*]], i32 0<br>
              +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x
              i32> [[TMP2]], i32 [[A1:%.*]], i32 1<br>
              +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x
              i32> [[TMP3]], i32 [[A4:%.*]], i32 2<br>
              +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x
              i32> [[TMP4]], i32 [[A5:%.*]], i32 3<br>
              +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x
              i32> [[TMP5]], i32 [[A8:%.*]], i32 4<br>
              +; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x
              i32> [[TMP6]], i32 [[A2:%.*]], i32 5<br>
               ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x
              i32> [[TMP7]], i32 [[A7:%.*]], i32 6<br>
              -; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x
              i32> [[TMP8]], i32 [[A8:%.*]], i32 7<br>
              +; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x
              i32> [[TMP8]], i32 [[A3:%.*]], i32 7<br>
               ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32>
              [[SHUFFLE]], [[TMP9]]<br>
               ; CHECK-NEXT:    [[TMP11:%.*]] = call i32
              @llvm.experimental.vector.reduce.umin.v8i32(<8 x
              i32> [[TMP10]])<br>
               ; CHECK-NEXT:    ret i32 [[TMP11]]<br>
              @@ -113,16 +111,15 @@ define i32 @foo2(i32* nocapture
              readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a<br>
               ; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr
              inbounds i32, i32* [[ARR]], i64 1<br>
               ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR]] to
              <4 x i32>*<br>
               ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>,
              <4 x i32>* [[TMP0]], align 4<br>
              -; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector
              <4 x i32> [[TMP1]], <4 x i32> undef, <4 x
              i32> <i32 3, i32 2, i32 0, i32 1><br>
              -; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x
              i32> [[REORDER_SHUFFLE]], <4 x i32> undef, <8
              x i32> <i32 0, i32 1, i32 0, i32 2, i32 3, i32 2,
              i32 1, i32 3><br>
              -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x
              i32> undef, i32 [[A1:%.*]], i32 0<br>
              -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x
              i32> [[TMP2]], i32 [[A2:%.*]], i32 1<br>
              -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x
              i32> [[TMP3]], i32 [[A3:%.*]], i32 2<br>
              -; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x
              i32> [[TMP4]], i32 [[A4:%.*]], i32 3<br>
              -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x
              i32> [[TMP5]], i32 [[A5:%.*]], i32 4<br>
              -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x
              i32> [[TMP6]], i32 [[A6:%.*]], i32 5<br>
              -; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x
              i32> [[TMP7]], i32 [[A7:%.*]], i32 6<br>
              -; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x
              i32> [[TMP8]], i32 [[A8:%.*]], i32 7<br>
              +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x
              i32> [[TMP1]], <4 x i32> undef, <8 x i32>
              <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32
              3><br>
              +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x
              i32> undef, i32 [[A4:%.*]], i32 0<br>
              +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x
              i32> [[TMP2]], i32 [[A6:%.*]], i32 1<br>
              +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x
              i32> [[TMP3]], i32 [[A5:%.*]], i32 2<br>
              +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x
              i32> [[TMP4]], i32 [[A8:%.*]], i32 3<br>
              +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x
              i32> [[TMP5]], i32 [[A2:%.*]], i32 4<br>
              +; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x
              i32> [[TMP6]], i32 [[A7:%.*]], i32 5<br>
              +; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x
              i32> [[TMP7]], i32 [[A1:%.*]], i32 6<br>
              +; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x
              i32> [[TMP8]], i32 [[A3:%.*]], i32 7<br>
               ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32>
              [[SHUFFLE]], [[TMP9]]<br>
               ; CHECK-NEXT:    [[TMP11:%.*]] = call i32
              @llvm.experimental.vector.reduce.umin.v8i32(<8 x
              i32> [[TMP10]])<br>
               ; CHECK-NEXT:    ret i32 [[TMP11]]<br>
              <br>
              <br>
              <br>
              _______________________________________________<br>
              llvm-commits mailing list<br>
              <a href="mailto:llvm-commits@lists.llvm.org"
                target="_blank" moz-do-not-send="true">llvm-commits@lists.llvm.org</a><br>
              <a
                href="https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits"
                rel="noreferrer" target="_blank" moz-do-not-send="true">https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
            </blockquote>
          </div>
        </blockquote>
      </div>
      <br clear="all">
      <div><br>
      </div>
      -- <br>
      <div dir="ltr" class="gmail_signature">
        <div dir="ltr">
          <div>
            <div dir="ltr">
              <div>
                <div dir="ltr">
                  <div>
                    <div dir="ltr">
                      <div>
                        <div dir="ltr">
                          <div>
                            <div dir="ltr">
                              <div dir="ltr">
                                <div dir="ltr">
                                  <div dir="ltr">
                                    <div dir="ltr">
                                      <div dir="ltr">
                                        <div><br>
                                        </div>
                                        <div dir="ltr"><br>
                                          <table
                                            style="font-family:"Times
                                            New Roman""
                                            cellspacing="0"
                                            cellpadding="0">
                                            <tbody>
                                              <tr
                                                style="color:rgb(85,85,85);font-family:sans-serif;font-size:small">
                                                <td
                                                  style="border-top:2px
                                                  solid rgb(213,15,37)"
                                                  nowrap="nowrap">Bogdan
                                                  Graur |</td>
                                                <td
                                                  style="border-top:2px
                                                  solid rgb(51,105,232)"
                                                  nowrap="nowrap"> Software
                                                  Engineer |</td>
                                                <td
                                                  style="border-top:2px
                                                  solid rgb(0,153,57)"
                                                  nowrap="nowrap"> <a
                                                    href="mailto:bgraur@google.com"
                                                    target="_blank"
                                                    moz-do-not-send="true">bgraur@google.com</a> |</td>
                                                <td
                                                  style="border-top:2px
                                                  solid rgb(238,178,17)"
                                                  nowrap="nowrap"> </td>
                                              </tr>
                                            </tbody>
                                          </table>
                                          <br>
                                          <p dir="ltr"
                                            style="line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style="font-size:13.3333px;font-family:Arial;color:rgb(102,102,102);vertical-align:baseline;white-space:pre-wrap;background-color:transparent"><img src="https://lh5.googleusercontent.com/lZxb5_FqaK3XHpcXHcC3bqbybXCvLOJo8YI5cG2pek020GNyVomybZ2sBHAWoWrfQmSPxQJ0FZE_SVm-2rIpB6d43geVSTRN7xT4gUcXLtEawiFbLNAaannVmJ3rqGLRD8k=s1600" style="border: none;" moz-do-not-send="true" width="100px;" height="58px;"></span></p>
                                          <p dir="ltr"
                                            style="line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style="font-size:13.3333px;font-family:Arial;vertical-align:baseline;white-space:pre-wrap;background-color:transparent"><font color="#999999">Google Germany GmbH</font></span></p>
                                          <div
                                            style="font-size:12.8px;font-family:Arial,Verdana,sans-serif">
                                            <div
style="font-family:arial,sans-serif;font-size:12px;line-height:16.6667px"><font
                                                size="2" face="verdana,
                                                sans-serif"
                                                color="#999999">Erika-Mann-Str.
                                                3</font><font size="2"
                                                face="verdana,
                                                sans-serif"
                                                color="#999999">3</font></div>
                                          </div>
                                          <div
                                            style="font-size:12.8px;font-family:Arial,Verdana,sans-serif"><font
                                              color="#999999">D-80636
                                              Munich</font></div>
                                          <br>
                                          <p dir="ltr"
                                            style="margin-top:0pt;margin-bottom:0pt"><font
                                              size="1"><span style="line-height:1.38;font-family:Arial;color:rgb(183,183,183);vertical-align:baseline;white-space:pre-wrap;background-color:transparent">Geschäftsführer:</span><font
                                                color="#999999"><span style="line-height:1.38;font-family:Arial;vertical-align:baseline;white-space:pre-wrap;background-color:transparent"> </span></font></font><span style="color:rgb(183,183,183);font-family:Arial;font-size:10.6667px;white-space:pre-wrap"> Paul Manicle, Halimah DeLaine Prado</span></p>
                                          <p dir="ltr"
                                            style="line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style="font-size:10.6667px;font-family:Arial;color:rgb(183,183,183);vertical-align:baseline;white-space:pre-wrap;background-color:transparent">Registergericht und -nummer: Hamburg, HRB 86891</span></p>
                                          <p dir="ltr"
                                            style="line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style="font-size:10.6667px;font-family:Arial;color:rgb(183,183,183);vertical-align:baseline;white-space:pre-wrap;background-color:transparent">Sitz der Gesellschaft: Hamburg</span></p>
                                          <br>
                                          <p dir="ltr"
                                            style="line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style="font-size:10.6667px;font-family:Arial;color:rgb(183,183,183);vertical-align:baseline;white-space:pre-wrap;background-color:transparent">Diese E-Mail ist vertraulich. Wenn Sie nicht der richtige Adressat sind, leiten Sie diese bitte nicht weiter, informieren Sie den Absender und löschen Sie die E-Mail und alle Anhänge. Vielen Dank.</span></p>
                                          <p dir="ltr"
                                            style="line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style="font-size:10.6667px;font-family:Arial;color:rgb(183,183,183);vertical-align:baseline;white-space:pre-wrap;background-color:transparent">       </span></p>
                                          <span style="font-size:10.6667px;font-family:Arial;color:rgb(183,183,183);vertical-align:baseline;white-space:pre-wrap;background-color:transparent">This e-mail is confidential. If you are not the right addressee please do not forward it, please inform the sender, and please erase this e-mail including any attachments. Thanks.</span><br
style="font-family:"Times New Roman";font-size:medium">
                                        </div>
                                      </div>
                                    </div>
                                  </div>
                                </div>
                              </div>
                            </div>
                          </div>
                        </div>
                      </div>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </blockquote>
  </body>
</html>