[llvm] r372626 - [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!")
    Jordan Rupprecht via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Thu Sep 26 15:10:44 PDT 2019
    
    
  
Temporarily reverted as r373019
On Thu, Sep 26, 2019 at 2:55 PM Jordan Rupprecht <rupprecht at google.com>
wrote:
> Looks like this causes some crashers now. On the unreduced C++ source we
> see it w/ -fexperimental-new-pass-manager but on the reduced case it seems
> to not matter. (Strange...)
>
> Anyway, the repro: $ clang -O3 -c reduced.ll
>
> Where reduced.ll is:
>
> ; ModuleID = 'reduced.ll'
> source_filename = "reduced.ll"
> target triple = "x86_64-unknown-linux-gnu"
>
> @k = external dso_local constant [8 x [4 x i32]], align 16
> @l = external dso_local global [366 x i32], align 16
>
> define void @n() {
> entry:
>   %i = alloca i32, align 4
>   %a = alloca i32, align 4
>   %b = alloca i32, align 4
>   %c = alloca i32, align 4
>   %cb = alloca i32, align 4
>   %cw = alloca i32, align 4
>   %d = alloca i32, align 4
>   %e = alloca i32, align 4
>   br label %for.cond
>
> for.cond:                                         ; preds = %for.end17,
> %entry
>   %0 = load i32, i32* %i, align 4
>   %add = add nsw i32 %0, -183
>   store i32 %add, i32* %a, align 4
>   store i32 0, i32* %c, align 4
>   store i32 0, i32* %cb, align 4
>   br label %for.cond3
>
> for.cond3:                                        ; preds = %for.end,
> %for.cond
>   %1 = load i32, i32* %cb, align 4
>   %cmp4 = icmp slt i32 %1, 8
>   br i1 %cmp4, label %for.body5, label %for.end17
>
> for.body5:                                        ; preds = %for.cond3
>   store i32 0, i32* %cw, align 4
>   br label %for.cond6
>
> for.cond6:                                        ; preds = %if.end14,
> %for.body5
>   %2 = load i32, i32* %cw, align 4
>   %cmp7 = icmp slt i32 %2, 4
>   %3 = load i32, i32* %cb, align 4
>   br i1 %cmp7, label %for.body8, label %for.end
>
> for.body8:                                        ; preds = %for.cond6
>   %g = sext i32 %3 to i64
>   %arrayidx = getelementptr inbounds [8 x [4 x i32]], [8 x [4 x i32]]* @k,
> i64 0, i64 %g
>   %4 = load i32, i32* %cw, align 4
>   %f = sext i32 %4 to i64
>   %h = getelementptr inbounds [4 x i32], [4 x i32]* %arrayidx, i64 0, i64
> %f
>   %5 = load i32, i32* %h, align 4
>   store i32 %5, i32* %d, align 4
>   %6 = load i32, i32* %a, align 4
>   %7 = load i32, i32* %d, align 4
>   %sub = sub nsw i32 %6, %7
>   %j = call i32 @abs(i32 %sub)
>   store i32 %j, i32* %e, align 4
>   %8 = load i32, i32* %e, align 4
>   %9 = load i32, i32* %b, align 4
>   %cmp12 = icmp slt i32 %8, %9
>   br i1 %cmp12, label %if.then13, label %if.end14
>
> if.then13:                                        ; preds = %for.body8
>   %10 = load i32, i32* %cb, align 4
>   store i32 %10, i32* %c, align 4
>   %11 = load i32, i32* %e, align 4
>   store i32 %11, i32* %b, align 4
>   br label %if.end14
>
> if.end14:                                         ; preds = %if.then13,
> %for.body8
>   %12 = load i32, i32* %cw, align 4
>   %inc = add nsw i32 %12, 1
>   store i32 %inc, i32* %cw, align 4
>   br label %for.cond6
>
> for.end:                                          ; preds = %for.cond6
>   %inc16 = add nsw i32 %3, 1
>   store i32 %inc16, i32* %cb, align 4
>   br label %for.cond3
>
> for.end17:                                        ; preds = %for.cond3
>   %13 = load i32, i32* %c, align 4
>   %14 = load i32, i32* %i, align 4
>   %g18 = sext i32 %14 to i64
>   %k = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 %g18
>   store i32 %13, i32* %k, align 4
>   %15 = load i32, i32* %i, align 4
>   %inc21 = add nsw i32 %15, 1
>   store i32 %inc21, i32* %i, align 4
>   br label %for.cond
> }
>
> declare i32 @abs(i32)
>
> =>
>
> Stack dump:
> 0.      Program arguments: /src/llvm-build/dev/bin/clang-10 -cc1 -triple
> x86_64-unknown-linux-gnu -emit-obj -disable-free -disable-llvm-verifier
> -discard-value-names -main-file-name reduced.ll -mrelocation-model static
> -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose
> -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64
> -dwarf-column-info -debugger-tuning=gdb -coverage-notes-file
> /tmp/crash/reduced.gcno -resource-dir /src/llvm-build/dev/lib/clang/10.0.0
> -O3 -fdebug-compilation-dir /tmp/crash -ferror-limit 19 -fmessage-length 0
> -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops
> -vectorize-slp -faddrsig -o reduced.o -x ir reduced.ll
> 1.      Per-module optimization passes
> 2.      Running pass 'Function Pass Manager' on module 'reduced.ll'.
> 3.      Running pass 'Combine redundant instructions' on function '@n'
>  #0 0x00007fd9935ae474 llvm::sys::PrintStackTrace(llvm::raw_ostream&)
> /src/llvm-project/llvm/lib/Support/Unix/Signals.inc:532:13
>
>  #1 0x00007fd9935ae474 PrintStackTraceSignalHandler(void*)
> /src/llvm-project/llvm/lib/Support/Unix/Signals.inc:592:3
>
>  #2 0x00007fd9935ac34e llvm::sys::RunSignalHandlers()
> /src/llvm-project/llvm/lib/Support/Signals.cpp:69:18
>
>
>  #3 0x00007fd9935ae728 SignalHandler(int)
> /src/llvm-project/llvm/lib/Support/Unix/Signals.inc:384:1
>  #4 0x00007fd992fa73a0 __restore_rt
> (/lib/x86_64-linux-gnu/libpthread.so.0+0x123a0)
>  #5 0x00007fd993a9c4cc llvm::ConstantInt::classof(llvm::Value const*)
> /src/llvm-project/llvm/include/llvm/IR/Constants.h:256:28
>
>  #6 0x00007fd993a9c4cc llvm::isa_impl<llvm::ConstantInt, llvm::Value,
> void>::doit(llvm::Value const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:58:12
>
>  #7 0x00007fd993a9c4cc llvm::isa_impl_cl<llvm::ConstantInt, llvm::Value
> const*>::doit(llvm::Value const*)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:106:12
>
>  #8 0x00007fd993a9c4cc llvm::isa_impl_wrap<llvm::ConstantInt, llvm::Value
> const*, llvm::Value const*>::doit(llvm::Value const* const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:132:12
>  #9 0x00007fd993a9c4cc llvm::isa_impl_wrap<llvm::ConstantInt, llvm::Value*
> const, llvm::Value const*>::doit(llvm::Value* const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:122:12
>
> #10 0x00007fd993a9c4cc bool llvm::isa<llvm::ConstantInt,
> llvm::Value*>(llvm::Value* const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:142:10
>
> #11 0x00007fd993a9c4cc llvm::cast_retty<llvm::ConstantInt,
> llvm::Value*>::ret_type llvm::dyn_cast<llvm::ConstantInt,
> llvm::Value>(llvm::Value*)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:343:10
> #12 0x00007fd993a9c4cc llvm::InstCombiner::foldOrOfICmps(llvm::ICmpInst*,
> llvm::ICmpInst*, llvm::Instruction&)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp:2148:23
>
> #13 0x00007fd993aa260e llvm::InstCombiner::visitOr(llvm::BinaryOperator&)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp:2592:18
>
> #14 0x00007fd993a7f0d0 llvm::InstCombiner::run()
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3312:22
>
>
> #15 0x00007fd993a8013f combineInstructionsOverFunction(llvm::Function&,
> llvm::InstCombineWorklist&, llvm::AAResults*, llvm::AssumptionCache&,
> llvm::TargetLibraryInfo&, llvm::DominatorTree&,
> llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*,
> llvm::ProfileSummaryInfo*, bool, llvm::LoopInfo*)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3546:13
>
> #16 0x00007fd993a81077
> llvm::InstructionCombiningPass::runOnFunction(llvm::Function&)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3620:10
>
> #17 0x00007fd993ef1816 llvm::FPPassManager::runOnFunction(llvm::Function&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1648:27
>
> #18 0x00007fd993ef1ad3 llvm::FPPassManager::runOnModule(llvm::Module&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1685:13
>
> #19 0x00007fd993ef2128 (anonymous
> namespace)::MPPassManager::runOnModule(llvm::Module&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1750:27
>
> #20 0x00007fd993ef2128 llvm::legacy::PassManagerImpl::run(llvm::Module&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1863:44
>
> #21 0x00007fd992767ac2 (anonymous
> namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction,
> std::unique_ptr<llvm::raw_pwrite_stream,
> std::default_delete<llvm::raw_pwrite_stream> >)
> /src/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:909:3
> #22 0x00007fd992767ac2 clang::EmitBackendOutput(clang::DiagnosticsEngine&,
> clang::HeaderSearchOptions const&, clang::CodeGenOptions const&,
> clang::TargetOptions const&, clang::LangOptions const&, llvm::DataLayout
> const&, llvm::Module*, clang::BackendAction,
> std::unique_ptr<llvm::raw_pwrite_stream,
> std::default_delete<llvm::raw_pwrite_stream> >)
> /src/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1533:15
> #23 0x00007fd9929c4e4d std::unique_ptr<llvm::raw_pwrite_stream,
> std::default_delete<llvm::raw_pwrite_stream> >::~unique_ptr()
> /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/unique_ptr.h:273:6
>
> #24 0x00007fd9929c4e4d clang::CodeGenAction::ExecuteAction()
> /src/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:1080:5
>
> #25 0x00007fd9923ee4a9 clang::FrontendAction::Execute()
> /src/llvm-project/clang/lib/Frontend/FrontendAction.cpp:939:10
>
>
> #26 0x00007fd99238de50 llvm::Error::getPtr() const
> /src/llvm-project/llvm/include/llvm/Support/Error.h:273:42
>
>
> #27 0x00007fd99238de50 llvm::Error::operator bool()
> /src/llvm-project/llvm/include/llvm/Support/Error.h:236:16
>
>
> #28 0x00007fd99238de50
> clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
> /src/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:957:23
>
> #29 0x00007fd9922e952c
> clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
> /src/llvm-project/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:290:25
>
> #30 0x0000000000213bfd cc1_main(llvm::ArrayRef<char const*>, char const*,
> void*) /src/llvm-project/clang/tools/driver/cc1_main.cpp:250:15
>
> #31 0x0000000000211e3f ExecuteCC1Tool(llvm::ArrayRef<char const*>,
> llvm::StringRef) /src/llvm-project/clang/tools/driver/driver.cpp:309:12
>
> #32 0x0000000000211e3f main
> /src/llvm-project/clang/tools/driver/driver.cpp:382:12
>
> On Mon, Sep 23, 2019 at 9:23 AM Alexey Bataev via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: abataev
>> Date: Mon Sep 23 09:25:03 2019
>> New Revision: 372626
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=372626&view=rev
>> Log:
>> [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L)
>> && "SCEVAddRecExpr operand is not loop-invariant!")
>>
>> Summary:
>> Initially SLP vectorizer replaced all going-to-be-vectorized
>> instructions with Undef values. It may break ScalarEvaluation and may
>> cause a crash.
>> Reworked SLP vectorizer so that it does not replace vectorized
>> instructions by UndefValue anymore. Instead vectorized instructions are
>> marked for deletion inside if BoUpSLP class and deleted upon class
>> destruction.
>>
>> Reviewers: mzolotukhin, mkuper, hfinkel, RKSimon, davide, spatel
>>
>> Subscribers: RKSimon, Gerolf, anemet, hans, majnemer, llvm-commits, sanjoy
>>
>> Differential Revision: https://reviews.llvm.org/D29641
>>
>> Added:
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll
>> Modified:
>>     llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
>>     llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>>     llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>>     llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll
>>
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>>
>> Modified: llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
>> (original)
>> +++ llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h Mon Sep
>> 23 09:25:03 2019
>> @@ -24,7 +24,6 @@
>>  #include "llvm/ADT/SmallVector.h"
>>  #include "llvm/Analysis/AliasAnalysis.h"
>>  #include "llvm/IR/PassManager.h"
>> -#include "llvm/IR/ValueHandle.h"
>>
>>  namespace llvm {
>>
>> @@ -60,8 +59,8 @@ extern cl::opt<bool> RunSLPVectorization
>>  struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
>>    using StoreList = SmallVector<StoreInst *, 8>;
>>    using StoreListMap = MapVector<Value *, StoreList>;
>> -  using WeakTrackingVHList = SmallVector<WeakTrackingVH, 8>;
>> -  using WeakTrackingVHListMap = MapVector<Value *, WeakTrackingVHList>;
>> +  using GEPList = SmallVector<GetElementPtrInst *, 8>;
>> +  using GEPListMap = MapVector<Value *, GEPList>;
>>
>>    ScalarEvolution *SE = nullptr;
>>    TargetTransformInfo *TTI = nullptr;
>> @@ -131,7 +130,7 @@ private:
>>
>>    /// Tries to vectorize constructs started from CmpInst,
>> InsertValueInst or
>>    /// InsertElementInst instructions.
>> -  bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
>> +  bool vectorizeSimpleInstructions(SmallVectorImpl<Instruction *>
>> &Instructions,
>>                                     BasicBlock *BB,
>> slpvectorizer::BoUpSLP &R);
>>
>>    /// Scan the basic block and look for patterns that are likely to start
>> @@ -147,7 +146,7 @@ private:
>>    StoreListMap Stores;
>>
>>    /// The getelementptr instructions in a basic block organized by base
>> pointer.
>> -  WeakTrackingVHListMap GEPs;
>> +  GEPListMap GEPs;
>>  };
>>
>>  } // end namespace llvm
>>
>> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Mon Sep 23
>> 09:25:03 2019
>> @@ -1121,6 +1121,14 @@ public:
>>  #endif
>>    };
>>
>> +  /// Checks if the instruction is marked for deletion.
>> +  bool isDeleted(Instruction *I) const { return
>> DeletedInstructions.count(I); }
>> +
>> +  /// Marks values for later deletion.
>> +  void eraseInstructions(ArrayRef<Value *> AV);
>> +
>> +  ~BoUpSLP();
>> +
>>  private:
>>    /// Checks if all users of \p I are the part of the vectorization tree.
>>    bool areAllUsersVectorized(Instruction *I) const;
>> @@ -1491,14 +1499,12 @@ private:
>>    /// AliasCache, which can happen if a new instruction is allocated at
>> the
>>    /// same address as a previously deleted instruction.
>>    void eraseInstruction(Instruction *I) {
>> -    I->removeFromParent();
>> -    I->dropAllReferences();
>> -    DeletedInstructions.emplace_back(I);
>> +    DeletedInstructions.insert(I);
>>    }
>>
>>    /// Temporary store for deleted instructions. Instructions will be
>> deleted
>>    /// eventually when the BoUpSLP is destructed.
>> -  SmallVector<unique_value, 8> DeletedInstructions;
>> +  SmallPtrSet<Instruction *, 8> DeletedInstructions;
>>
>>    /// A list of values that need to extracted out of the tree.
>>    /// This list holds pairs of (Internal Scalar : External User).
>> External User
>> @@ -2055,6 +2061,22 @@ template <> struct DOTGraphTraits<BoUpSL
>>
>>  } // end namespace llvm
>>
>> +BoUpSLP::~BoUpSLP() {
>> +  for (auto *I : DeletedInstructions)
>> +    I->dropAllReferences();
>> +  for (auto *I : DeletedInstructions) {
>> +    assert(I->use_empty() && "trying to erase instruction with users.");
>> +    I->eraseFromParent();
>> +  }
>> +}
>> +
>> +void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
>> +  for (auto *V : AV) {
>> +    if (auto *I = dyn_cast<Instruction>(V))
>> +      eraseInstruction(I);
>> +  };
>> +}
>> +
>>  void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
>>                          ArrayRef<Value *> UserIgnoreLst) {
>>    ExtraValueToDebugLocsMap ExternallyUsedValues;
>> @@ -3541,7 +3563,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *>
>>    // Generate the 'InsertElement' instruction.
>>    for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
>>      Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
>> -    if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
>> +    if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
>>        GatherSeq.insert(Insrt);
>>        CSEBlocks.insert(Insrt->getParent());
>>
>> @@ -4290,20 +4312,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebug
>>      for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
>>        Value *Scalar = Entry->Scalars[Lane];
>>
>> +#ifndef NDEBUG
>>        Type *Ty = Scalar->getType();
>>        if (!Ty->isVoidTy()) {
>> -#ifndef NDEBUG
>>          for (User *U : Scalar->users()) {
>>            LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
>>
>> -          // It is legal to replace users in the ignorelist by undef.
>> +          // It is legal to delete users in the ignorelist.
>>            assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
>> -                 "Replacing out-of-tree value with undef");
>> +                 "Deleting out-of-tree value");
>>          }
>> -#endif
>> -        Value *Undef = UndefValue::get(Ty);
>> -        Scalar->replaceAllUsesWith(Undef);
>>        }
>> +#endif
>>        LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
>>        eraseInstruction(cast<Instruction>(Scalar));
>>      }
>> @@ -4319,7 +4339,7 @@ void BoUpSLP::optimizeGatherSequence() {
>>                      << " gather sequences instructions.\n");
>>    // LICM InsertElementInst sequences.
>>    for (Instruction *I : GatherSeq) {
>> -    if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I))
>> +    if (isDeleted(I))
>>        continue;
>>
>>      // Check if this block is inside a loop.
>> @@ -4373,6 +4393,8 @@ void BoUpSLP::optimizeGatherSequence() {
>>      // For all instructions in blocks containing gather sequences:
>>      for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;)
>> {
>>        Instruction *In = &*it++;
>> +      if (isDeleted(In))
>> +        continue;
>>        if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
>>          continue;
>>
>> @@ -5255,19 +5277,6 @@ bool SLPVectorizerPass::runImpl(Function
>>    return Changed;
>>  }
>>
>> -/// Check that the Values in the slice in VL array are still existent in
>> -/// the WeakTrackingVH array.
>> -/// Vectorization of part of the VL array may cause later values in the
>> VL array
>> -/// to become invalid. We track when this has happened in the
>> WeakTrackingVH
>> -/// array.
>> -static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
>> -                               ArrayRef<WeakTrackingVH> VH, unsigned
>> SliceBegin,
>> -                               unsigned SliceSize) {
>> -  VL = VL.slice(SliceBegin, SliceSize);
>> -  VH = VH.slice(SliceBegin, SliceSize);
>> -  return !std::equal(VL.begin(), VL.end(), VH.begin());
>> -}
>> -
>>  bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
>> BoUpSLP &R,
>>                                              unsigned VecRegSize) {
>>    const unsigned ChainLen = Chain.size();
>> @@ -5279,20 +5288,20 @@ bool SLPVectorizerPass::vectorizeStoreCh
>>    if (!isPowerOf2_32(Sz) || VF < 2)
>>      return false;
>>
>> -  // Keep track of values that were deleted by vectorizing in the loop
>> below.
>> -  const SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(),
>> Chain.end());
>> -
>>    bool Changed = false;
>>    // Look for profitable vectorizable trees at all offsets, starting at
>> zero.
>>    for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) {
>>
>> +    ArrayRef<Value *> Operands = Chain.slice(i, VF);
>>      // Check that a previous iteration of this loop did not delete the
>> Value.
>> -    if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
>> +    if (llvm::any_of(Operands, [&R](Value *V) {
>> +          auto *I = dyn_cast<Instruction>(V);
>> +          return I && R.isDeleted(I);
>> +        }))
>>        continue;
>>
>>      LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset "
>> << i
>>                        << "\n");
>> -    ArrayRef<Value *> Operands = Chain.slice(i, VF);
>>
>>      R.buildTree(Operands);
>>      if (R.isTreeTinyAndNotFullyVectorizable())
>> @@ -5484,9 +5493,6 @@ bool SLPVectorizerPass::tryToVectorizeLi
>>    bool CandidateFound = false;
>>    int MinCost = SLPCostThreshold;
>>
>> -  // Keep track of values that were deleted by vectorizing in the loop
>> below.
>> -  SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
>> -
>>    unsigned NextInst = 0, MaxInst = VL.size();
>>    for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /=
>> 2) {
>>      // No actual vectorization should happen, if number of parts is the
>> same as
>> @@ -5506,13 +5512,16 @@ bool SLPVectorizerPass::tryToVectorizeLi
>>        if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
>>          break;
>>
>> +      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
>>        // Check that a previous iteration of this loop did not delete the
>> Value.
>> -      if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
>> +      if (llvm::any_of(Ops, [&R](Value *V) {
>> +            auto *I = dyn_cast<Instruction>(V);
>> +            return I && R.isDeleted(I);
>> +          }))
>>          continue;
>>
>>        LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations
>> "
>>                          << "\n");
>> -      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
>>
>>        R.buildTree(Ops);
>>        Optional<ArrayRef<unsigned>> Order = R.bestOrder();
>> @@ -5733,23 +5742,23 @@ class HorizontalReduction {
>>        case RK_Min:
>>          Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS,
>> RHS)
>>                                            : Builder.CreateFCmpOLT(LHS,
>> RHS);
>> -        break;
>> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>>        case RK_Max:
>>          Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS,
>> RHS)
>>                                            : Builder.CreateFCmpOGT(LHS,
>> RHS);
>> -        break;
>> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>>        case RK_UMin:
>>          assert(Opcode == Instruction::ICmp && "Expected integer types.");
>>          Cmp = Builder.CreateICmpULT(LHS, RHS);
>> -        break;
>> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>>        case RK_UMax:
>>          assert(Opcode == Instruction::ICmp && "Expected integer types.");
>>          Cmp = Builder.CreateICmpUGT(LHS, RHS);
>> -        break;
>> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>>        case RK_None:
>> -        llvm_unreachable("Unknown reduction operation.");
>> +        break;
>>        }
>> -      return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>> +      llvm_unreachable("Unknown reduction operation.");
>>      }
>>
>>    public:
>> @@ -6429,6 +6438,9 @@ public:
>>        }
>>        // Update users.
>>        ReductionRoot->replaceAllUsesWith(VectorizedTree);
>> +      // Mark all scalar reduction ops for deletion, they are replaced
>> by the
>> +      // vector reductions.
>> +      V.eraseInstructions(IgnoreList);
>>      }
>>      return VectorizedTree != nullptr;
>>    }
>> @@ -6683,18 +6695,13 @@ static bool tryToVectorizeHorReductionOr
>>    // horizontal reduction.
>>    // Interrupt the process if the Root instruction itself was vectorized
>> or all
>>    // sub-trees not higher that RecursionMaxDepth were
>> analyzed/vectorized.
>> -  SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root,
>> 0});
>> +  SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
>>    SmallPtrSet<Value *, 8> VisitedInstrs;
>>    bool Res = false;
>>    while (!Stack.empty()) {
>> -    Value *V;
>> +    Instruction *Inst;
>>      unsigned Level;
>> -    std::tie(V, Level) = Stack.pop_back_val();
>> -    if (!V)
>> -      continue;
>> -    auto *Inst = dyn_cast<Instruction>(V);
>> -    if (!Inst)
>> -      continue;
>> +    std::tie(Inst, Level) = Stack.pop_back_val();
>>      auto *BI = dyn_cast<BinaryOperator>(Inst);
>>      auto *SI = dyn_cast<SelectInst>(Inst);
>>      if (BI || SI) {
>> @@ -6735,8 +6742,8 @@ static bool tryToVectorizeHorReductionOr
>>        for (auto *Op : Inst->operand_values())
>>          if (VisitedInstrs.insert(Op).second)
>>            if (auto *I = dyn_cast<Instruction>(Op))
>> -            if (!isa<PHINode>(I) && I->getParent() == BB)
>> -              Stack.emplace_back(Op, Level);
>> +            if (!isa<PHINode>(I) && !R.isDeleted(I) && I->getParent() ==
>> BB)
>> +              Stack.emplace_back(I, Level);
>>    }
>>    return Res;
>>  }
>> @@ -6805,11 +6812,10 @@ bool SLPVectorizerPass::vectorizeCmpInst
>>  }
>>
>>  bool SLPVectorizerPass::vectorizeSimpleInstructions(
>> -    SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
>> +    SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB,
>> BoUpSLP &R) {
>>    bool OpsChanged = false;
>> -  for (auto &VH : reverse(Instructions)) {
>> -    auto *I = dyn_cast_or_null<Instruction>(VH);
>> -    if (!I)
>> +  for (auto *I : reverse(Instructions)) {
>> +    if (R.isDeleted(I))
>>        continue;
>>      if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
>>        OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
>> @@ -6838,7 +6844,7 @@ bool SLPVectorizerPass::vectorizeChainsI
>>        if (!P)
>>          break;
>>
>> -      if (!VisitedInstrs.count(P))
>> +      if (!VisitedInstrs.count(P) && !R.isDeleted(P))
>>          Incoming.push_back(P);
>>      }
>>
>> @@ -6882,9 +6888,12 @@ bool SLPVectorizerPass::vectorizeChainsI
>>
>>    VisitedInstrs.clear();
>>
>> -  SmallVector<WeakVH, 8> PostProcessInstructions;
>> +  SmallVector<Instruction *, 8> PostProcessInstructions;
>>    SmallDenseSet<Instruction *, 4> KeyNodes;
>>    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;
>> ++it) {
>> +    // Skip instructions marked for the deletion.
>> +    if (R.isDeleted(&*it))
>> +      continue;
>>      // We may go through BB multiple times so skip the one we have
>> checked.
>>      if (!VisitedInstrs.insert(&*it).second) {
>>        if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
>> @@ -6977,10 +6986,10 @@ bool SLPVectorizerPass::vectorizeGEPIndi
>>        SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
>>
>>        // Some of the candidates may have already been vectorized after we
>> -      // initially collected them. If so, the WeakTrackingVHs will have
>> -      // nullified the
>> -      // values, so remove them from the set of candidates.
>> -      Candidates.remove(nullptr);
>> +      // initially collected them. If so, they are marked as deleted, so
>> remove
>> +      // them from the set of candidates.
>> +      Candidates.remove_if(
>> +          [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
>>
>>        // Remove from the set of candidates all pairs of getelementptrs
>> with
>>        // constant differences. Such getelementptrs are likely not good
>> @@ -6988,18 +6997,18 @@ bool SLPVectorizerPass::vectorizeGEPIndi
>>        // computed from the other. We also ensure all candidate
>> getelementptr
>>        // indices are unique.
>>        for (int I = 0, E = GEPList.size(); I < E && Candidates.size() >
>> 1; ++I) {
>> -        auto *GEPI = cast<GetElementPtrInst>(GEPList[I]);
>> +        auto *GEPI = GEPList[I];
>>          if (!Candidates.count(GEPI))
>>            continue;
>>          auto *SCEVI = SE->getSCEV(GEPList[I]);
>>          for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
>> -          auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]);
>> +          auto *GEPJ = GEPList[J];
>>            auto *SCEVJ = SE->getSCEV(GEPList[J]);
>>            if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
>> -            Candidates.remove(GEPList[I]);
>> -            Candidates.remove(GEPList[J]);
>> +            Candidates.remove(GEPI);
>> +            Candidates.remove(GEPJ);
>>            } else if (GEPI->idx_begin()->get() ==
>> GEPJ->idx_begin()->get()) {
>> -            Candidates.remove(GEPList[J]);
>> +            Candidates.remove(GEPJ);
>>            }
>>          }
>>        }
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -17,16 +17,8 @@ define void @PR28330(i32 %n) {
>>  ; DEFAULT:       for.body:
>>  ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]],
>> [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
>>  ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>> -; DEFAULT-NEXT:    [[P20:%.*]] = add i32 [[P17]], undef
>> -; DEFAULT-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
>> -; DEFAULT-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
>> -; DEFAULT-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
>> -; DEFAULT-NEXT:    [[P28:%.*]] = add i32 [[P26]], undef
>> -; DEFAULT-NEXT:    [[P30:%.*]] = add i32 [[P28]], undef
>> -; DEFAULT-NEXT:    [[P32:%.*]] = add i32 [[P30]], undef
>>  ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
>>  ; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
>> -; DEFAULT-NEXT:    [[P34:%.*]] = add i32 [[P32]], undef
>>  ; DEFAULT-NEXT:    br label [[FOR_BODY]]
>>  ;
>>  ; GATHER-LABEL: @PR28330(
>> @@ -36,37 +28,30 @@ define void @PR28330(i32 %n) {
>>  ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
>>  ; GATHER:       for.body:
>>  ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
>> ], [ 0, [[ENTRY:%.*]] ]
>> -; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> -; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP2]], i32 0
>> -; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> -; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1
>> [[TMP4]], i32 1
>> -; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> -; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1
>> [[TMP6]], i32 2
>> -; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> -; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1
>> [[TMP8]], i32 3
>> -; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> -; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1
>> [[TMP10]], i32 4
>> -; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> -; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1
>> [[TMP12]], i32 5
>> -; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> -; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1
>> [[TMP14]], i32 6
>> -; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> -; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1
>> [[TMP16]], i32 7
>> +; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> +; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> +; GATHER-NEXT:    [[TMP4:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP3]], i32 0
>> +; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> +; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1
>> [[TMP5]], i32 1
>> +; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> +; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1
>> [[TMP7]], i32 2
>> +; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> +; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1
>> [[TMP9]], i32 3
>> +; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> +; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1
>> [[TMP11]], i32 4
>> +; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> +; GATHER-NEXT:    [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1
>> [[TMP13]], i32 5
>> +; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> +; GATHER-NEXT:    [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1
>> [[TMP15]], i32 6
>> +; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1
>> [[TMP2]], i32 7
>>  ; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>>  ; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 0
>> -; GATHER-NEXT:    [[P20:%.*]] = add i32 [[P17]], [[TMP19]]
>>  ; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 1
>> -; GATHER-NEXT:    [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
>>  ; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 2
>> -; GATHER-NEXT:    [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
>>  ; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 3
>> -; GATHER-NEXT:    [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
>>  ; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 4
>> -; GATHER-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
>>  ; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 5
>> -; GATHER-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
>>  ; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 6
>> -; GATHER-NEXT:    [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
>>  ; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32
>> [[TMP19]], i32 0
>>  ; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32
>> [[TMP20]], i32 1
>>  ; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32
>> [[TMP21]], i32 2
>> @@ -78,7 +63,6 @@ define void @PR28330(i32 %n) {
>>  ; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32
>> [[TMP33]], i32 7
>>  ; GATHER-NEXT:    [[TMP35:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
>>  ; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]]
>> -; GATHER-NEXT:    [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
>>  ; GATHER-NEXT:    br label [[FOR_BODY]]
>>  ;
>>  ; MAX-COST-LABEL: @PR28330(
>> @@ -169,16 +153,8 @@ define void @PR32038(i32 %n) {
>>  ; DEFAULT:       for.body:
>>  ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]],
>> [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
>>  ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>> -; DEFAULT-NEXT:    [[P20:%.*]] = add i32 -5, undef
>> -; DEFAULT-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
>> -; DEFAULT-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
>> -; DEFAULT-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
>> -; DEFAULT-NEXT:    [[P28:%.*]] = add i32 [[P26]], undef
>> -; DEFAULT-NEXT:    [[P30:%.*]] = add i32 [[P28]], undef
>> -; DEFAULT-NEXT:    [[P32:%.*]] = add i32 [[P30]], undef
>>  ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
>>  ; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
>> -; DEFAULT-NEXT:    [[P34:%.*]] = add i32 [[P32]], undef
>>  ; DEFAULT-NEXT:    br label [[FOR_BODY]]
>>  ;
>>  ; GATHER-LABEL: @PR32038(
>> @@ -188,37 +164,30 @@ define void @PR32038(i32 %n) {
>>  ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
>>  ; GATHER:       for.body:
>>  ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
>> ], [ 0, [[ENTRY:%.*]] ]
>> -; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> -; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP2]], i32 0
>> -; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> -; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1
>> [[TMP4]], i32 1
>> -; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> -; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1
>> [[TMP6]], i32 2
>> -; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> -; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1
>> [[TMP8]], i32 3
>> -; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> -; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1
>> [[TMP10]], i32 4
>> -; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> -; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1
>> [[TMP12]], i32 5
>> -; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> -; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1
>> [[TMP14]], i32 6
>> -; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> -; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1
>> [[TMP16]], i32 7
>> +; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> +; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> +; GATHER-NEXT:    [[TMP4:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP3]], i32 0
>> +; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> +; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1
>> [[TMP5]], i32 1
>> +; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> +; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1
>> [[TMP7]], i32 2
>> +; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> +; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1
>> [[TMP9]], i32 3
>> +; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> +; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1
>> [[TMP11]], i32 4
>> +; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> +; GATHER-NEXT:    [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1
>> [[TMP13]], i32 5
>> +; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> +; GATHER-NEXT:    [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1
>> [[TMP15]], i32 6
>> +; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1
>> [[TMP2]], i32 7
>>  ; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>>  ; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 0
>> -; GATHER-NEXT:    [[P20:%.*]] = add i32 -5, [[TMP19]]
>>  ; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 1
>> -; GATHER-NEXT:    [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
>>  ; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 2
>> -; GATHER-NEXT:    [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
>>  ; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 3
>> -; GATHER-NEXT:    [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
>>  ; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 4
>> -; GATHER-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
>>  ; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 5
>> -; GATHER-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
>>  ; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 6
>> -; GATHER-NEXT:    [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
>>  ; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32
>> [[TMP19]], i32 0
>>  ; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32
>> [[TMP20]], i32 1
>>  ; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32
>> [[TMP21]], i32 2
>> @@ -230,7 +199,6 @@ define void @PR32038(i32 %n) {
>>  ; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32
>> [[TMP33]], i32 7
>>  ; GATHER-NEXT:    [[TMP35:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
>>  ; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], -5
>> -; GATHER-NEXT:    [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
>>  ; GATHER-NEXT:    br label [[FOR_BODY]]
>>  ;
>>  ; MAX-COST-LABEL: @PR32038(
>> @@ -259,18 +227,12 @@ define void @PR32038(i32 %n) {
>>  ; MAX-COST-NEXT:    [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1
>> [[P5]], i32 2
>>  ; MAX-COST-NEXT:    [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1
>> [[P7]], i32 3
>>  ; MAX-COST-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32
>> -80, i32 -80>
>> -; MAX-COST-NEXT:    [[P20:%.*]] = add i32 -5, undef
>> -; MAX-COST-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
>> -; MAX-COST-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
>> -; MAX-COST-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
>>  ; MAX-COST-NEXT:    [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
>> -; MAX-COST-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[P27]]
>>  ; MAX-COST-NEXT:    [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
>>  ; MAX-COST-NEXT:    [[TMP9:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
>>  ; MAX-COST-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]]
>>  ; MAX-COST-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]]
>>  ; MAX-COST-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5
>> -; MAX-COST-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[P29]]
>>  ; MAX-COST-NEXT:    [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
>>  ; MAX-COST-NEXT:    [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
>>  ; MAX-COST-NEXT:    [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -46,12 +46,8 @@ define i32 @test_select(i32* noalias noc
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]],
>> zeroinitializer
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer,
>> [[TMP4]]
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32>
>> [[TMP6]], <4 x i32> [[TMP4]]
>> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_026]]
>> -; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD]], undef
>> -; CHECK-NEXT:    [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
>> -; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef
>>  ; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32*
>> [[P1_023]], i64 [[IDX_EXT]]
>>  ; CHECK-NEXT:    [[ADD_PTR29]] = getelementptr inbounds i32, i32*
>> [[P2_024]], i64 [[IDX_EXT]]
>>  ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_025]], 1
>> @@ -173,12 +169,8 @@ define i32 @reduction_with_br(i32* noali
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>*
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
>> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_020]]
>> -; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD]], undef
>> -; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
>> -; CHECK-NEXT:    [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef
>>  ; CHECK-NEXT:    [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
>>  ; CHECK-NEXT:    br i1 [[CMP14]], label [[IF_END]], label
>> [[FOR_END_LOOPEXIT:%.*]]
>>  ; CHECK:       if.end:
>> @@ -293,16 +285,8 @@ define i32 @test_unrolled_select(i8* noa
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]],
>> zeroinitializer
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer,
>> [[TMP6]]
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32>
>> [[TMP8]], <8 x i32> [[TMP6]]
>> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_047]]
>> -; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[ADD]], undef
>> -; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef
>> -; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef
>> -; CHECK-NEXT:    [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef
>> -; CHECK-NEXT:    [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef
>> -; CHECK-NEXT:    [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef
>>  ; CHECK-NEXT:    [[TMP10:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
>> -; CHECK-NEXT:    [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef
>>  ; CHECK-NEXT:    [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
>>  ; CHECK-NEXT:    br i1 [[CMP83]], label [[IF_END_86]], label
>> [[FOR_END_LOOPEXIT:%.*]]
>>  ; CHECK:       if.end.86:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -13,11 +13,11 @@ define void @patatino(i64 %n, i64 %i, %s
>>  ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[I:%.*]],
>> metadata !19, metadata !DIExpression()), !dbg !24
>>  ; CHECK-NEXT:    call void @llvm.dbg.value(metadata %struct.S*
>> [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25
>>  ; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]],
>> %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26
>> -; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata
>> !21, metadata !DIExpression()), !dbg !27
>> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !21,
>> metadata !DIExpression()), !dbg !27
>>  ; CHECK-NEXT:    [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]],
>> %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg
>> !26
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]],
>> align 8, !dbg !26, !tbaa !29
>> -; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata
>> !22, metadata !DIExpression()), !dbg !33
>> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !22,
>> metadata !DIExpression()), !dbg !33
>>  ; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]],
>> %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34
>>  ; CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]],
>> %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg
>> !36
>>
>> Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll?rev=372626&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll (added)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -0,0 +1,153 @@
>> +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
>> +; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s |
>> FileCheck %s
>> +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
>> +
>> + at shift = common local_unnamed_addr global [10 x i32] zeroinitializer,
>> align 4
>> + at data = common local_unnamed_addr global [10 x i8*] zeroinitializer,
>> align 4
>> +
>> +define void @flat(i32 %intensity) {
>> +; CHECK-LABEL: @flat(
>> +; CHECK-NEXT:  entry:
>> +; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds
>> ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
>> +; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds
>> ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
>> +; CHECK-NEXT:    [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds
>> ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
>> +; CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds
>> ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
>> +; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 1, [[TMP0]]
>> +; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8*
>> [[TMP2]], i32 [[SHR]]
>> +; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 1, [[TMP1]]
>> +; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8*
>> [[TMP3]], i32 [[SHR1]]
>> +; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
>> +; CHECK:       for.cond.cleanup:
>> +; CHECK-NEXT:    ret void
>> +; CHECK:       for.body:
>> +; CHECK-NEXT:    [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]]
>> ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
>> +; CHECK-NEXT:    [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [
>> [[INC_1:%.*]], [[FOR_BODY]] ]
>> +; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
>> +; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
>> +; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[CONV]], -128
>> +; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
>> +; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[TMP5]] to i32
>> +; CHECK-NEXT:    [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128
>> +; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1
>> +; CHECK-NEXT:    [[SUB7:%.*]] = sub nsw i32 128, [[CONV]]
>> +; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP5]], i32 [[SUB]], i32
>> [[SUB7]]
>> +; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[SUB4]], -1
>> +; CHECK-NEXT:    [[SUB12:%.*]] = sub nsw i32 128, [[CONV3]]
>> +; CHECK-NEXT:    [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32
>> [[SUB12]]
>> +; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]]
>> +; CHECK-NEXT:    [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]]
>> +; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8*
>> [[D1_DATA_046]], i32 [[IDX_NEG]]
>> +; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1
>> +; CHECK-NEXT:    [[CONV15:%.*]] = zext i8 [[TMP6]] to i32
>> +; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[CONV15]],
>> [[INTENSITY:%.*]]
>> +; CHECK-NEXT:    [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8
>> +; CHECK-NEXT:    store i8 [[CONV17]], i8* [[ADD_PTR]], align 1
>> +; CHECK-NEXT:    [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8*
>> [[D1_DATA_046]], i32 [[ADD]]
>> +; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1
>> +; CHECK-NEXT:    [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0
>> +; CHECK-NEXT:    [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8
>> +; CHECK-NEXT:    store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1
>> +; CHECK-NEXT:    [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8*
>> [[D1_DATA_046]], i32 [[TMP1]]
>> +; CHECK-NEXT:    [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
>> +; CHECK-NEXT:    [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32
>> +; CHECK-NEXT:    [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128
>> +; CHECK-NEXT:    [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
>> +; CHECK-NEXT:    [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32
>> +; CHECK-NEXT:    [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128
>> +; CHECK-NEXT:    [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1
>> +; CHECK-NEXT:    [[SUB7_1:%.*]] = sub nsw i32 128, [[CONV_1]]
>> +; CHECK-NEXT:    [[COND_1:%.*]] = select i1 [[CMP5_1]], i32 [[SUB_1]],
>> i32 [[SUB7_1]]
>> +; CHECK-NEXT:    [[CMP8_1:%.*]] = icmp sgt i32 [[SUB4_1]], -1
>> +; CHECK-NEXT:    [[SUB12_1:%.*]] = sub nsw i32 128, [[CONV3_1]]
>> +; CHECK-NEXT:    [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32
>> [[SUB4_1]], i32 [[SUB12_1]]
>> +; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]]
>> +; CHECK-NEXT:    [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]]
>> +; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8*
>> [[ADD_PTR23]], i32 [[IDX_NEG_1]]
>> +; CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1
>> +; CHECK-NEXT:    [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32
>> +; CHECK-NEXT:    [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]],
>> [[INTENSITY]]
>> +; CHECK-NEXT:    [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8
>> +; CHECK-NEXT:    store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1
>> +; CHECK-NEXT:    [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8*
>> [[ADD_PTR23]], i32 [[ADD_1]]
>> +; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1
>> +; CHECK-NEXT:    [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0
>> +; CHECK-NEXT:    [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8
>> +; CHECK-NEXT:    store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1
>> +; CHECK-NEXT:    [[ADD_PTR23_1]] = getelementptr inbounds i8, i8*
>> [[ADD_PTR23]], i32 [[TMP1]]
>> +; CHECK-NEXT:    [[INC_1]] = add nsw i32 [[Y_045]], 2
>> +; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128
>> +; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]],
>> label [[FOR_BODY]]
>> +;
>> +entry:
>> +  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]*
>> @shift, i32 0, i32 0), align 4
>> +  %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]*
>> @shift, i32 0, i32 1), align 4
>> +  %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]*
>> @data, i32 0, i32 0), align 4
>> +  %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]*
>> @data, i32 0, i32 1), align 4
>> +  %shr = lshr i32 1, %0
>> +  %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr
>> +  %shr1 = lshr i32 1, %1
>> +  %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1
>> +  br label %for.body
>> +
>> +for.cond.cleanup:                                 ; preds = %for.body
>> +  ret void
>> +
>> +for.body:                                         ; preds = %for.body,
>> %entry
>> +  %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ]
>> +  %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ]
>> +  %4 = load i8, i8* %arrayidx, align 1
>> +  %conv = zext i8 %4 to i32
>> +  %sub = add nsw i32 %conv, -128
>> +  %5 = load i8, i8* %arrayidx2, align 1
>> +  %conv3 = zext i8 %5 to i32
>> +  %sub4 = add nsw i32 %conv3, -128
>> +  %cmp5 = icmp sgt i32 %sub, -1
>> +  %sub7 = sub nsw i32 128, %conv
>> +  %cond = select i1 %cmp5, i32 %sub, i32 %sub7
>> +  %cmp8 = icmp sgt i32 %sub4, -1
>> +  %sub12 = sub nsw i32 128, %conv3
>> +  %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12
>> +  %add = add nsw i32 %cond14, %cond
>> +  %idx.neg = sub nsw i32 0, %add
>> +  %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg
>> +  %6 = load i8, i8* %add.ptr, align 1
>> +  %conv15 = zext i8 %6 to i32
>> +  %add16 = add nsw i32 %conv15, %intensity
>> +  %conv17 = trunc i32 %add16 to i8
>> +  store i8 %conv17, i8* %add.ptr, align 1
>> +  %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add
>> +  %7 = load i8, i8* %add.ptr18, align 1
>> +  %not.tobool = icmp eq i8 %7, 0
>> +  %conv21 = zext i1 %not.tobool to i8
>> +  store i8 %conv21, i8* %add.ptr18, align 1
>> +  %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1
>> +  %8 = load i8, i8* %arrayidx, align 1
>> +  %conv.1 = zext i8 %8 to i32
>> +  %sub.1 = add nsw i32 %conv.1, -128
>> +  %9 = load i8, i8* %arrayidx2, align 1
>> +  %conv3.1 = zext i8 %9 to i32
>> +  %sub4.1 = add nsw i32 %conv3.1, -128
>> +  %cmp5.1 = icmp sgt i32 %sub.1, -1
>> +  %sub7.1 = sub nsw i32 128, %conv.1
>> +  %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1
>> +  %cmp8.1 = icmp sgt i32 %sub4.1, -1
>> +  %sub12.1 = sub nsw i32 128, %conv3.1
>> +  %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1
>> +  %add.1 = add nsw i32 %cond14.1, %cond.1
>> +  %idx.neg.1 = sub nsw i32 0, %add.1
>> +  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1
>> +  %10 = load i8, i8* %add.ptr.1, align 1
>> +  %conv15.1 = zext i8 %10 to i32
>> +  %add16.1 = add nsw i32 %conv15.1, %intensity
>> +  %conv17.1 = trunc i32 %add16.1 to i8
>> +  store i8 %conv17.1, i8* %add.ptr.1, align 1
>> +  %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1
>> +  %11 = load i8, i8* %add.ptr18.1, align 1
>> +  %not.tobool.1 = icmp eq i8 %11, 0
>> +  %conv21.1 = zext i1 %not.tobool.1 to i8
>> +  store i8 %conv21.1, i8* %add.ptr18.1, align 1
>> +  %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1
>> +  %inc.1 = add nsw i32 %y.045, 2
>> +  %exitcond.1 = icmp eq i32 %inc.1, 128
>> +  br i1 %exitcond.1, label %for.cond.cleanup, label %for.body
>> +}
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -18,23 +18,16 @@ define void @mainTest(i32* %ptr) #0  {
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
>> -; CHECK-NEXT:    [[TMP9:%.*]] = add i32 1, undef
>> -; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]]
>> -; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]]
>> -; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
>> -; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]]
>> +; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
>> +; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1
>>  ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
>>  ; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
>>  ; CHECK-NEXT:    [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
>> -; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP15]], undef
>>  ; CHECK-NEXT:    br label [[LOOP]]
>>  ; CHECK:       bail_out:
>>  ; CHECK-NEXT:    ret void
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -20,10 +20,6 @@ define void @test() #0 {
>>  ; CHECK-NEXT:    [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64
>> 1>, [[TMP5]]
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32,
>> i64 32, i64 32, i64 32>
>> -; CHECK-NEXT:    [[SUM1:%.*]] = add i64 undef, undef
>> -; CHECK-NEXT:    [[SUM2:%.*]] = add i64 [[SUM1]], undef
>> -; CHECK-NEXT:    [[ZSUM:%.*]] = add i64 [[SUM2]], 0
>> -; CHECK-NEXT:    [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4
>> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64>
>> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -31,7 +27,6 @@ define void @test() #0 {
>>  ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
>>  ; CHECK-NEXT:    [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
>> -; CHECK-NEXT:    [[LAST:%.*]] = add i64 [[JOIN]], undef
>>  ; CHECK-NEXT:    br label [[LOOP]]
>>  ;
>>  entry:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -11,40 +11,6 @@ define void @Test(i32) {
>>  ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2
>> x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
>> i32 1>
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]],
>> i32 1
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32
>> 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
>> -; CHECK-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
>> -; CHECK-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
>> -; CHECK-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
>> -; CHECK-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
>> -; CHECK-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
>> -; CHECK-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
>> -; CHECK-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
>> -; CHECK-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -52,7 +18,7 @@ define void @Test(i32) {
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
>> +; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]]
>>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
>>  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
>>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
>> @@ -79,7 +45,6 @@ define void @Test(i32) {
>>  ; CHECK-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
>>  ; CHECK-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
>>  ; CHECK-NEXT:    [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
>> -; CHECK-NEXT:    [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> undef, i32
>> [[OP_EXTRA30]], i32 0
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32
>> 14910, i32 1
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32
>> [[TMP2]], i32 0
>> @@ -101,40 +66,8 @@ define void @Test(i32) {
>>  ; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32>
>> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
>>  ; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32>
>> [[SHUFFLE]], i32 1
>>  ; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]],
>> <i32 0, i32 55, i32 285, i32 1240>
>> -; FORCE_REDUCTION-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
>> -; FORCE_REDUCTION-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]],
>> [[TMP0:%.*]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
>> -; FORCE_REDUCTION-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
>> -; FORCE_REDUCTION-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
>>  ; FORCE_REDUCTION-NEXT:    [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
>> -; FORCE_REDUCTION-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]],
>> [[VAL_20]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
>>  ; FORCE_REDUCTION-NEXT:    [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
>> -; FORCE_REDUCTION-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]],
>> [[VAL_34]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
>>  ; FORCE_REDUCTION-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32>
>> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; FORCE_REDUCTION-NEXT:    [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; FORCE_REDUCTION-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -142,7 +75,7 @@ define void @Test(i32) {
>>  ; FORCE_REDUCTION-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32>
>> [[BIN_RDX2]], i32 0
>>  ; FORCE_REDUCTION-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
>>  ; FORCE_REDUCTION-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
>> -; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
>> +; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]],
>> [[TMP0:%.*]]
>>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]],
>> [[TMP0]]
>>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]],
>> [[TMP0]]
>>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]],
>> [[TMP0]]
>> @@ -170,7 +103,6 @@ define void @Test(i32) {
>>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]],
>> [[TMP0]]
>>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]],
>> [[TMP0]]
>>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]],
>> [[TMP2]]
>> -; FORCE_REDUCTION-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
>>  ; FORCE_REDUCTION-NEXT:    [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
>>  ; FORCE_REDUCTION-NEXT:    [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]],
>> [[VAL_39]]
>>  ; FORCE_REDUCTION-NEXT:    [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -13,21 +13,6 @@ define void @mainTest(i32 %param, i32 *
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]],
>> i32 15
>>  ; CHECK-NEXT:    store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered,
>> align 4
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32
>> 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32
>> 4, i32 3, i32 2, i32 1, i32 -1>
>> -; CHECK-NEXT:    [[V14:%.*]] = and i32 [[TMP2]], undef
>> -; CHECK-NEXT:    [[V16:%.*]] = and i32 undef, [[V14]]
>> -; CHECK-NEXT:    [[V18:%.*]] = and i32 undef, [[V16]]
>> -; CHECK-NEXT:    [[V20:%.*]] = and i32 undef, [[V18]]
>> -; CHECK-NEXT:    [[V22:%.*]] = and i32 undef, [[V20]]
>> -; CHECK-NEXT:    [[V24:%.*]] = and i32 undef, [[V22]]
>> -; CHECK-NEXT:    [[V26:%.*]] = and i32 undef, [[V24]]
>> -; CHECK-NEXT:    [[V28:%.*]] = and i32 undef, [[V26]]
>> -; CHECK-NEXT:    [[V30:%.*]] = and i32 undef, [[V28]]
>> -; CHECK-NEXT:    [[V32:%.*]] = and i32 undef, [[V30]]
>> -; CHECK-NEXT:    [[V34:%.*]] = and i32 undef, [[V32]]
>> -; CHECK-NEXT:    [[V36:%.*]] = and i32 undef, [[V34]]
>> -; CHECK-NEXT:    [[V38:%.*]] = and i32 undef, [[V36]]
>> -; CHECK-NEXT:    [[V40:%.*]] = and i32 undef, [[V38]]
>> -; CHECK-NEXT:    [[V42:%.*]] = and i32 undef, [[V40]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]],
>> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
>> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32>
>> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -38,7 +23,6 @@ define void @mainTest(i32 %param, i32 *
>>  ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]],
>> [[RDX_SHUF5]]
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
>> -; CHECK-NEXT:    [[V43:%.*]] = and i32 undef, [[V42]]
>>  ; CHECK-NEXT:    [[V44:%.*]] = add i32 [[TMP2]], 16
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> undef, i32
>> [[V44]], i32 0
>>  ; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32
>> [[OP_EXTRA]], i32 1
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll Mon Sep
>> 23 09:25:03 2019
>> @@ -30,12 +30,6 @@ define i64 @load_bswap(%v8i8* %p) {
>>  ; CHECK-NEXT:    [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24
>>  ; CHECK-NEXT:    [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16
>>  ; CHECK-NEXT:    [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8
>> -; CHECK-NEXT:    [[OR01:%.*]] = or i64 undef, undef
>> -; CHECK-NEXT:    [[OR012:%.*]] = or i64 [[OR01]], undef
>> -; CHECK-NEXT:    [[OR0123:%.*]] = or i64 [[OR012]], undef
>> -; CHECK-NEXT:    [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]]
>> -; CHECK-NEXT:    [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]]
>> -; CHECK-NEXT:    [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4
>> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64>
>> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -45,7 +39,6 @@ define i64 @load_bswap(%v8i8* %p) {
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]]
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]]
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]]
>> -; CHECK-NEXT:    [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]]
>>  ; CHECK-NEXT:    ret i64 [[OP_EXTRA]]
>>  ;
>>    %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
>> @@ -108,12 +101,6 @@ define i64 @load_bswap_nop_shift(%v8i8*
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align
>> 1
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 56, i64
>> 48, i64 40, i64 32, i64 24, i64 16, i64 8, i64 0>
>> -; CHECK-NEXT:    [[OR01:%.*]] = or i64 undef, undef
>> -; CHECK-NEXT:    [[OR012:%.*]] = or i64 [[OR01]], undef
>> -; CHECK-NEXT:    [[OR0123:%.*]] = or i64 [[OR012]], undef
>> -; CHECK-NEXT:    [[OR01234:%.*]] = or i64 [[OR0123]], undef
>> -; CHECK-NEXT:    [[OR012345:%.*]] = or i64 [[OR01234]], undef
>> -; CHECK-NEXT:    [[OR0123456:%.*]] = or i64 [[OR012345]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8
>> x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -121,7 +108,6 @@ define i64 @load_bswap_nop_shift(%v8i8*
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[OR01234567:%.*]] = or i64 [[OR0123456]], undef
>>  ; CHECK-NEXT:    ret i64 [[TMP5]]
>>  ;
>>    %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
>> @@ -196,12 +182,6 @@ define i64 @load64le(i8* %arg) {
>>  ; CHECK-NEXT:    [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40
>>  ; CHECK-NEXT:    [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48
>>  ; CHECK-NEXT:    [[S7:%.*]] = shl nuw i64 [[Z7]], 56
>> -; CHECK-NEXT:    [[O1:%.*]] = or i64 undef, [[Z0]]
>> -; CHECK-NEXT:    [[O2:%.*]] = or i64 [[O1]], undef
>> -; CHECK-NEXT:    [[O3:%.*]] = or i64 [[O2]], undef
>> -; CHECK-NEXT:    [[O4:%.*]] = or i64 [[O3]], undef
>> -; CHECK-NEXT:    [[O5:%.*]] = or i64 [[O4]], [[S5]]
>> -; CHECK-NEXT:    [[O6:%.*]] = or i64 [[O5]], [[S6]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4
>> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64>
>> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -211,7 +191,6 @@ define i64 @load64le(i8* %arg) {
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]]
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]]
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]]
>> -; CHECK-NEXT:    [[O7:%.*]] = or i64 [[O6]], [[S7]]
>>  ; CHECK-NEXT:    ret i64 [[OP_EXTRA]]
>>  ;
>>    %g1 = getelementptr inbounds i8, i8* %arg, i64 1
>> @@ -272,12 +251,6 @@ define i64 @load64le_nop_shift(i8* %arg)
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align
>> 1
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 0, i64
>> 8, i64 16, i64 24, i64 32, i64 40, i64 48, i64 56>
>> -; CHECK-NEXT:    [[O1:%.*]] = or i64 undef, undef
>> -; CHECK-NEXT:    [[O2:%.*]] = or i64 [[O1]], undef
>> -; CHECK-NEXT:    [[O3:%.*]] = or i64 [[O2]], undef
>> -; CHECK-NEXT:    [[O4:%.*]] = or i64 [[O3]], undef
>> -; CHECK-NEXT:    [[O5:%.*]] = or i64 [[O4]], undef
>> -; CHECK-NEXT:    [[O6:%.*]] = or i64 [[O5]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8
>> x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -285,7 +258,6 @@ define i64 @load64le_nop_shift(i8* %arg)
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[O7:%.*]] = or i64 [[O6]], undef
>>  ; CHECK-NEXT:    ret i64 [[TMP5]]
>>  ;
>>    %g1 = getelementptr inbounds i8, i8* %arg, i64 1
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -100,16 +100,8 @@ define float @bazz() {
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([20 x float]* @arr to <8 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([20 x float]* @arr1 to <8 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
>> -; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>>  ; CHECK-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
>>  ; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
>> -; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
>> -; CHECK-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
>> -; CHECK-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
>> -; CHECK-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -119,7 +111,6 @@ define float @bazz() {
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
>>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV6]]
>> -; CHECK-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
>>  ; CHECK-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
>>  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
>>  ;
>> @@ -131,16 +122,8 @@ define float @bazz() {
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> bitcast ([20 x float]* @arr to <8 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>*
>> bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]],
>> [[TMP1]]
>> -; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>>  ; THRESHOLD-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
>>  ; THRESHOLD-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
>> -; THRESHOLD-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
>> -; THRESHOLD-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
>> -; THRESHOLD-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
>> -; THRESHOLD-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -150,7 +133,6 @@ define float @bazz() {
>>  ; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <8 x float>
>> [[BIN_RDX4]], i32 0
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]],
>> [[CONV]]
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV6]]
>> -; THRESHOLD-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
>>  ; THRESHOLD-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
>>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
>>  ;
>> @@ -205,17 +187,14 @@ define float @bazzz() {
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr to <4 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr1 to <4 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
>> -; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; CHECK-NEXT:    store float [[TMP8]], float* @res, align 4
>> -; CHECK-NEXT:    ret float [[TMP8]]
>> +; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; CHECK-NEXT:    store float [[TMP5]], float* @res, align 4
>> +; CHECK-NEXT:    ret float [[TMP5]]
>>  ;
>>  ; THRESHOLD-LABEL: @bazzz(
>>  ; THRESHOLD-NEXT:  entry:
>> @@ -224,17 +203,14 @@ define float @bazzz() {
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]],
>> [[TMP1]]
>> -; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
>> -; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float>
>> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float>
>> [[BIN_RDX]], [[RDX_SHUF1]]
>> -; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> -; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; THRESHOLD-NEXT:    store float [[TMP8]], float* @res, align 4
>> -; THRESHOLD-NEXT:    ret float [[TMP8]]
>> +; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> +; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; THRESHOLD-NEXT:    store float [[TMP5]], float* @res, align 4
>> +; THRESHOLD-NEXT:    ret float [[TMP5]]
>>  ;
>>  entry:
>>    %0 = load i32, i32* @n, align 4
>> @@ -267,16 +243,13 @@ define i32 @foo() {
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr to <4 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr1 to <4 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
>> -; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
>> +; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
>>  ; CHECK-NEXT:    store i32 [[CONV4]], i32* @n, align 4
>>  ; CHECK-NEXT:    ret i32 [[CONV4]]
>>  ;
>> @@ -287,16 +260,13 @@ define i32 @foo() {
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]],
>> [[TMP1]]
>> -; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
>> -; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float>
>> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float>
>> [[BIN_RDX]], [[RDX_SHUF1]]
>> -; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> -; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
>> +; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> +; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
>>  ; THRESHOLD-NEXT:    store i32 [[CONV4]], i32* @n, align 4
>>  ; THRESHOLD-NEXT:    ret i32 [[CONV4]]
>>  ;
>> @@ -330,11 +300,6 @@ define float @bar() {
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr to <4 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr1 to <4 x float>*), align 16
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
>> -; CHECK-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef,
>> float undef
>> -; CHECK-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]],
>> undef
>> -; CHECK-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float
>> [[MAX_0_MUL3]], float undef
>> -; CHECK-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]],
>> undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float>
>> [[TMP2]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
>> @@ -342,7 +307,6 @@ define float @bar() {
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float>
>> [[RDX_SHUF1]]
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; CHECK-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float
>> [[MAX_0_MUL3_1]], float undef
>>  ; CHECK-NEXT:    store float [[TMP3]], float* @res, align 4
>>  ; CHECK-NEXT:    ret float [[TMP3]]
>>  ;
>> @@ -351,11 +315,6 @@ define float @bar() {
>>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]],
>> [[TMP0]]
>> -; THRESHOLD-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
>> -; THRESHOLD-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float
>> undef, float undef
>> -; THRESHOLD-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float
>> [[MAX_0_MUL3]], undef
>> -; THRESHOLD-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float
>> [[MAX_0_MUL3]], float undef
>> -; THRESHOLD-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float
>> [[MAX_0_MUL3_1]], undef
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float>
>> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float>
>> [[TMP2]], [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
>> @@ -363,7 +322,6 @@ define float @bar() {
>>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float>
>> [[RDX_SHUF1]]
>>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = extractelement <4 x float>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; THRESHOLD-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float
>> [[MAX_0_MUL3_1]], float undef
>>  ; THRESHOLD-NEXT:    store float [[TMP3]], float* @res, align 4
>>  ; THRESHOLD-NEXT:    ret float [[TMP3]]
>>  ;
>> @@ -410,21 +368,6 @@ define float @f(float* nocapture readonl
>>  ; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP0]], align 4
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>>  ; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>>  ; CHECK-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>>  ; CHECK-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 18
>> @@ -459,37 +402,6 @@ define float @f(float* nocapture readonl
>>  ; CHECK-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 47
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP2]], align 4
>> -; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>> -; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>> -; CHECK-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
>> -; CHECK-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
>> -; CHECK-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
>> -; CHECK-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
>> -; CHECK-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
>> -; CHECK-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
>> -; CHECK-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
>> -; CHECK-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
>> -; CHECK-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
>> -; CHECK-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
>> -; CHECK-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
>> -; CHECK-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
>> -; CHECK-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
>> -; CHECK-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
>> -; CHECK-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]],
>> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -511,7 +423,6 @@ define float @f(float* nocapture readonl
>>  ; CHECK-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float>
>> [[BIN_RDX14]], [[RDX_SHUF15]]
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float>
>> [[BIN_RDX16]], i32 0
>>  ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
>> -; CHECK-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
>>  ; CHECK-NEXT:    ret float [[OP_RDX]]
>>  ;
>>  ; THRESHOLD-LABEL: @f(
>> @@ -533,21 +444,6 @@ define float @f(float* nocapture readonl
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
>> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 18
>> @@ -582,37 +478,6 @@ define float @f(float* nocapture readonl
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 47
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to
>> <32 x float>*
>>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP2]], align 4
>> -; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>> -; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>> -; THRESHOLD-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
>> -; THRESHOLD-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
>> -; THRESHOLD-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
>> -; THRESHOLD-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
>> -; THRESHOLD-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
>> -; THRESHOLD-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
>> -; THRESHOLD-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
>> -; THRESHOLD-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
>> -; THRESHOLD-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
>> -; THRESHOLD-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
>> -; THRESHOLD-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
>> -; THRESHOLD-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
>> -; THRESHOLD-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
>> -; THRESHOLD-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
>> -; THRESHOLD-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float>
>> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19,
>> i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32
>> 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -634,7 +499,6 @@ define float @f(float* nocapture readonl
>>  ; THRESHOLD-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float>
>> [[BIN_RDX14]], [[RDX_SHUF15]]
>>  ; THRESHOLD-NEXT:    [[TMP5:%.*]] = extractelement <16 x float>
>> [[BIN_RDX16]], i32 0
>>  ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
>> -; THRESHOLD-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
>>  ; THRESHOLD-NEXT:    ret float [[OP_RDX]]
>>  ;
>>    entry:
>> @@ -821,37 +685,6 @@ define float @f1(float* nocapture readon
>>  ; CHECK-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 31
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP0]], align 4
>> -; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]],
>> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -864,7 +697,6 @@ define float @f1(float* nocapture readon
>>  ; CHECK-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]],
>> [[RDX_SHUF7]]
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <32 x float>
>> [[BIN_RDX8]], i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
>> -; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>>  ; CHECK-NEXT:    ret float [[OP_EXTRA]]
>>  ;
>>  ; THRESHOLD-LABEL: @f1(
>> @@ -904,37 +736,6 @@ define float @f1(float* nocapture readon
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 31
>>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float>
>> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19,
>> i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32
>> 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -947,7 +748,6 @@ define float @f1(float* nocapture readon
>>  ; THRESHOLD-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float>
>> [[BIN_RDX6]], [[RDX_SHUF7]]
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <32 x float>
>> [[BIN_RDX8]], i32 0
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]],
>> [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA]]
>>  ;
>>    entry:
>> @@ -1058,17 +858,12 @@ define float @loadadd31(float* nocapture
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
>>  ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 2
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align
>> 4
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
>>  ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 3
>>  ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 4
>>  ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 5
>>  ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 6
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]],
>> align 4
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>>  ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 8
>>  ; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 9
>> @@ -1079,14 +874,6 @@ define float @loadadd31(float* nocapture
>>  ; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 14
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
>> align 4
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>>  ; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>>  ; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>>  ; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>> @@ -1105,21 +892,6 @@ define float @loadadd31(float* nocapture
>>  ; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 30
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP6]], align 4
>> -; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]],
>> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
>> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1145,7 +917,6 @@ define float @loadadd31(float* nocapture
>>  ; CHECK-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
>>  ; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
>>  ; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
>> -; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>>  ; CHECK-NEXT:    ret float [[TMP12]]
>>  ;
>>  ; THRESHOLD-LABEL: @loadadd31(
>> @@ -1154,17 +925,12 @@ define float @loadadd31(float* nocapture
>>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]],
>> align 4
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 2
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]],
>> align 4
>> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 3
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 4
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 5
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 6
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4
>> x float>*
>>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>*
>> [[TMP2]], align 4
>> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 8
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 9
>> @@ -1175,14 +941,6 @@ define float @loadadd31(float* nocapture
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 14
>>  ; THRESHOLD-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8
>> x float>*
>>  ; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP4]], align 4
>> -; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>> @@ -1201,21 +959,6 @@ define float @loadadd31(float* nocapture
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 30
>>  ; THRESHOLD-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to
>> <16 x float>*
>>  ; THRESHOLD-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP6]], align 4
>> -; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float>
>> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32
>> 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1241,7 +984,6 @@ define float @loadadd31(float* nocapture
>>  ; THRESHOLD-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]],
>> [[TMP10]]
>>  ; THRESHOLD-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]],
>> [[TMP1]]
>>  ; THRESHOLD-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
>> -; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>>  ; THRESHOLD-NEXT:    ret float [[TMP12]]
>>  ;
>>    entry:
>> @@ -1352,14 +1094,6 @@ define float @extra_args(float* nocaptur
>>  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
>> -; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1369,7 +1103,6 @@ define float @extra_args(float* nocaptur
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>>  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
>>  ;
>>  ; THRESHOLD-LABEL: @extra_args(
>> @@ -1386,14 +1119,6 @@ define float @extra_args(float* nocaptur
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1403,7 +1128,6 @@ define float @extra_args(float* nocaptur
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float>
>> [[BIN_RDX4]], i32 0
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
>>  ;
>>    entry:
>> @@ -1452,16 +1176,6 @@ define float @extra_args_same_several_ti
>>  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; CHECK-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
>> -; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
>> -; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; CHECK-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]],
>> 5.000000e+00
>> -; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
>> -; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1473,7 +1187,6 @@ define float @extra_args_same_several_ti
>>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> 5.000000e+00
>>  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]],
>> 5.000000e+00
>>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]],
>> [[CONV]]
>> -; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>>  ; CHECK-NEXT:    ret float [[OP_EXTRA7]]
>>  ;
>>  ; THRESHOLD-LABEL: @extra_args_same_several_times(
>> @@ -1490,16 +1203,6 @@ define float @extra_args_same_several_ti
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; THRESHOLD-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]],
>> 5.000000e+00
>> -; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; THRESHOLD-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]],
>> 5.000000e+00
>> -; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
>> -; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1511,7 +1214,6 @@ define float @extra_args_same_several_ti
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> 5.000000e+00
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]],
>> 5.000000e+00
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]],
>> [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA7]]
>>  ;
>>    entry:
>> @@ -1564,14 +1266,6 @@ define float @extra_args_no_replace(floa
>>  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
>> -; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
>> -; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
>> -; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1581,7 +1275,6 @@ define float @extra_args_no_replace(floa
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>>  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
>>  ;
>>  ; THRESHOLD-LABEL: @extra_args_no_replace(
>> @@ -1600,14 +1293,6 @@ define float @extra_args_no_replace(floa
>>  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
>> -; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
>> -; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1617,7 +1302,6 @@ define float @extra_args_no_replace(floa
>>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float>
>> [[BIN_RDX4]], i32 0
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
>>  ;
>>    entry:
>> @@ -1668,10 +1352,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
>>  ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]],
>> zeroinitializer
>>  ; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
>> -; CHECK-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
>> -; CHECK-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
>> -; CHECK-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
>> -; CHECK-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]],
>> <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -1679,7 +1359,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>>  ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
>>  ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
>> -; CHECK-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
>>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA3]]
>>  ;
>>  ; THRESHOLD-LABEL: @wobble(
>> @@ -1696,10 +1375,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>>  ; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]],
>> i32 3
>>  ; THRESHOLD-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]],
>> zeroinitializer
>>  ; THRESHOLD-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
>> -; THRESHOLD-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
>> -; THRESHOLD-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
>> -; THRESHOLD-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
>> -; THRESHOLD-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32>
>> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]],
>> [[RDX_SHUF]]
>>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -1707,7 +1382,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>>  ; THRESHOLD-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32>
>> [[BIN_RDX2]], i32 0
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
>>  ; THRESHOLD-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]],
>> [[TMP9]]
>> -; THRESHOLD-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
>>  ; THRESHOLD-NEXT:    ret i32 [[OP_EXTRA3]]
>>  ;
>>    bb:
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -12,19 +12,6 @@
>>  define i32 @maxi8(i32) {
>>  ; CHECK-LABEL: @maxi8(
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr to <8 x i32>*), align 16
>> -; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
>> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
>> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]]
>> @@ -34,9 +21,8 @@ define i32 @maxi8(i32) {
>>  ; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x i32>
>> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>> -; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; CHECK-NEXT:    ret i32 [[TMP16]]
>> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> +; CHECK-NEXT:    ret i32 [[TMP3]]
>>  ;
>>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -66,35 +52,6 @@ define i32 @maxi8(i32) {
>>  define i32 @maxi16(i32) {
>>  ; CHECK-LABEL: @maxi16(
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast
>> ([32 x i32]* @arr to <16 x i32>*), align 16
>> -; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
>> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
>> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
>> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
>> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
>> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
>> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
>> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
>> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]],
>> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
>> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]]
>> @@ -107,9 +64,8 @@ define i32 @maxi16(i32) {
>>  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x i32>
>> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32>
>> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32>
>> [[RDX_SHUF7]]
>> -; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x i32>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> -; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
>> undef
>> -; CHECK-NEXT:    ret i32 [[TMP32]]
>> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i32>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> +; CHECK-NEXT:    ret i32 [[TMP3]]
>>  ;
>>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -163,67 +119,6 @@ define i32 @maxi16(i32) {
>>  define i32 @maxi32(i32) {
>>  ; CHECK-LABEL: @maxi32(
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast
>> ([32 x i32]* @arr to <32 x i32>*), align 16
>> -; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
>> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
>> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
>> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
>> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
>> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
>> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
>> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
>> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
>> -; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef
>> -; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef
>> -; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef
>> -; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef
>> -; CHECK-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef
>> -; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef
>> -; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef
>> -; CHECK-NEXT:    [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef
>> -; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef
>> -; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef
>> -; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef
>> -; CHECK-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef
>> -; CHECK-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef
>> -; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef
>> -; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef
>> -; CHECK-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]],
>> <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]]
>> @@ -239,9 +134,8 @@ define i32 @maxi32(i32) {
>>  ; CHECK-NEXT:    [[RDX_SHUF10:%.*]] = shufflevector <32 x i32>
>> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32>
>> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32>
>> [[RDX_SHUF10]]
>> -; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <32 x i32>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> -; CHECK-NEXT:    [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32
>> undef
>> -; CHECK-NEXT:    ret i32 [[TMP64]]
>> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <32 x i32>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> +; CHECK-NEXT:    ret i32 [[TMP3]]
>>  ;
>>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -343,19 +237,6 @@ define i32 @maxi32(i32) {
>>  define float @maxf8(float) {
>>  ; CHECK-LABEL: @maxf8(
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([32 x float]* @arr1 to <8 x float>*), align 16
>> -; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
>> undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
>> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]],
>> float undef
>> -; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
>> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]],
>> float undef
>> -; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
>> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
>> float undef
>> -; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
>> float undef
>> -; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
>> float undef
>> -; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float>
>> [[TMP2]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
>> @@ -365,9 +246,8 @@ define float @maxf8(float) {
>>  ; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x float>
>> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float>
>> [[RDX_SHUF4]]
>> -; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x float>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]],
>> float undef
>> -; CHECK-NEXT:    ret float [[TMP16]]
>> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> +; CHECK-NEXT:    ret float [[TMP3]]
>>  ;
>>    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 0), align 16
>>    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 1), align 4
>> @@ -397,35 +277,6 @@ define float @maxf8(float) {
>>  define float @maxf16(float) {
>>  ; CHECK-LABEL: @maxf16(
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast
>> ([32 x float]* @arr1 to <16 x float>*), align 16
>> -; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
>> undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
>> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]],
>> float undef
>> -; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
>> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]],
>> float undef
>> -; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
>> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
>> float undef
>> -; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
>> float undef
>> -; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
>> float undef
>> -; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
>> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]],
>> float undef
>> -; CHECK-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
>> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]],
>> float undef
>> -; CHECK-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
>> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]],
>> float undef
>> -; CHECK-NEXT:    [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
>> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]],
>> float undef
>> -; CHECK-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
>> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]],
>> float undef
>> -; CHECK-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
>> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]],
>> float undef
>> -; CHECK-NEXT:    [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
>> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]],
>> float undef
>> -; CHECK-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
>> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]],
>> float undef
>> -; CHECK-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]],
>> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
>> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float>
>> [[TMP2]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
>> @@ -438,9 +289,8 @@ define float @maxf16(float) {
>>  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x float>
>> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float>
>> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float>
>> [[RDX_SHUF7]]
>> -; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x float>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> -; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]],
>> float undef
>> -; CHECK-NEXT:    ret float [[TMP32]]
>> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> +; CHECK-NEXT:    ret float [[TMP3]]
>>  ;
>>    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 0), align 16
>>    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 1), align 4
>> @@ -494,67 +344,6 @@ define float @maxf16(float) {
>>  define float @maxf32(float) {
>>  ; CHECK-LABEL: @maxf32(
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast
>> ([32 x float]* @arr1 to <32 x float>*), align 16
>> -; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
>> undef
>> -; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
>> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]],
>> float undef
>> -; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
>> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]],
>> float undef
>> -; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
>> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
>> float undef
>> -; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
>> float undef
>> -; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
>> float undef
>> -; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
>> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]],
>> float undef
>> -; CHECK-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
>> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]],
>> float undef
>> -; CHECK-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
>> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]],
>> float undef
>> -; CHECK-NEXT:    [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
>> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]],
>> float undef
>> -; CHECK-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
>> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]],
>> float undef
>> -; CHECK-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
>> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]],
>> float undef
>> -; CHECK-NEXT:    [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
>> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]],
>> float undef
>> -; CHECK-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
>> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]],
>> float undef
>> -; CHECK-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
>> -; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]],
>> float undef
>> -; CHECK-NEXT:    [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef
>> -; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]],
>> float undef
>> -; CHECK-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef
>> -; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]],
>> float undef
>> -; CHECK-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef
>> -; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]],
>> float undef
>> -; CHECK-NEXT:    [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef
>> -; CHECK-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]],
>> float undef
>> -; CHECK-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef
>> -; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]],
>> float undef
>> -; CHECK-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef
>> -; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]],
>> float undef
>> -; CHECK-NEXT:    [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef
>> -; CHECK-NEXT:    [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]],
>> float undef
>> -; CHECK-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef
>> -; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]],
>> float undef
>> -; CHECK-NEXT:    [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef
>> -; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]],
>> float undef
>> -; CHECK-NEXT:    [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef
>> -; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]],
>> float undef
>> -; CHECK-NEXT:    [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef
>> -; CHECK-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]],
>> float undef
>> -; CHECK-NEXT:    [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef
>> -; CHECK-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]],
>> float undef
>> -; CHECK-NEXT:    [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef
>> -; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]],
>> float undef
>> -; CHECK-NEXT:    [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef
>> -; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]],
>> float undef
>> -; CHECK-NEXT:    [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef
>> -; CHECK-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]],
>> float undef
>> -; CHECK-NEXT:    [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]],
>> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float>
>> [[TMP2]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]]
>> @@ -570,9 +359,8 @@ define float @maxf32(float) {
>>  ; CHECK-NEXT:    [[RDX_SHUF10:%.*]] = shufflevector <32 x float>
>> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> <i32 1, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float>
>> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float>
>> [[RDX_SHUF10]]
>> -; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <32 x float>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> -; CHECK-NEXT:    [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]],
>> float undef
>> -; CHECK-NEXT:    ret float [[TMP64]]
>> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <32 x float>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> +; CHECK-NEXT:    ret float [[TMP3]]
>>  ;
>>    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 0), align 16
>>    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 1), align 4
>> @@ -678,34 +466,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>>  ; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>>  ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>>  ; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; SSE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> +; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>>  ; SSE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; SSE-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>>  ; SSE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>>  ; SSE-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; SSE-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; SSE-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
>> -; SSE-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
>> [[TMP15]]
>> -; SSE-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
>> -; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]],
>> i32 [[TMP5]]
>> -; SSE-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; SSE-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; SSE-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
>> -; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]],
>> i32 [[TMP22]]
>> -; SSE-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> -; SSE-NEXT:    store i32 [[TMP25]], i32* @var, align 8
>> -; SSE-NEXT:    ret i32 [[TMP24]]
>> +; SSE-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
>> +; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> [[TMP7]]
>> +; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
>> +; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]],
>> i32 [[TMP5]]
>> +; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
>> +; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]],
>> i32 [[TMP12]]
>> +; SSE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> +; SSE-NEXT:    store i32 [[TMP15]], i32* @var, align 8
>> +; SSE-NEXT:    ret i32 [[TMP14]]
>>  ;
>>  ; AVX-LABEL: @maxi8_mutiple_uses(
>>  ; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
>> @@ -713,34 +491,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>>  ; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>>  ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>>  ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> +; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>>  ; AVX-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; AVX-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>>  ; AVX-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>>  ; AVX-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
>> -; AVX-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
>> [[TMP15]]
>> -; AVX-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
>> -; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]],
>> i32 [[TMP5]]
>> -; AVX-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
>> -; AVX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]],
>> i32 [[TMP22]]
>> -; AVX-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> -; AVX-NEXT:    store i32 [[TMP25]], i32* @var, align 8
>> -; AVX-NEXT:    ret i32 [[TMP24]]
>> +; AVX-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
>> +; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> [[TMP7]]
>> +; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
>> +; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]],
>> i32 [[TMP5]]
>> +; AVX-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
>> +; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]],
>> i32 [[TMP12]]
>> +; AVX-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> +; AVX-NEXT:    store i32 [[TMP15]], i32* @var, align 8
>> +; AVX-NEXT:    ret i32 [[TMP14]]
>>  ;
>>  ; AVX2-LABEL: @maxi8_mutiple_uses(
>>  ; AVX2-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
>> @@ -748,34 +516,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>>  ; AVX2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>>  ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>>  ; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX2-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> +; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>>  ; AVX2-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>>  ; AVX2-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX2-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX2-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
>> -; AVX2-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
>> [[TMP15]]
>> -; AVX2-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
>> -; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]],
>> i32 [[TMP5]]
>> -; AVX2-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX2-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX2-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
>> -; AVX2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]],
>> i32 [[TMP22]]
>> -; AVX2-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> -; AVX2-NEXT:    store i32 [[TMP25]], i32* @var, align 8
>> -; AVX2-NEXT:    ret i32 [[TMP24]]
>> +; AVX2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
>> +; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> [[TMP7]]
>> +; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
>> +; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]],
>> i32 [[TMP5]]
>> +; AVX2-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
>> +; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]],
>> i32 [[TMP12]]
>> +; AVX2-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> +; AVX2-NEXT:    store i32 [[TMP15]], i32* @var, align 8
>> +; AVX2-NEXT:    ret i32 [[TMP14]]
>>  ;
>>  ; SKX-LABEL: @maxi8_mutiple_uses(
>>  ; SKX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x
>> i32]* @arr to <2 x i32>*), align 16
>> @@ -797,26 +555,16 @@ define i32 @maxi8_mutiple_uses(i32) {
>>  ; SKX-NEXT:    [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
>>  ; SKX-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32>
>> [[TMP9]], <2 x i32> [[TMP11]]
>>  ; SKX-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
>> -; SKX-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>> -; SKX-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; SKX-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
>> -; SKX-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
>> undef
>> -; SKX-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
>> -; SKX-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
>> undef
>> -; SKX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; SKX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; SKX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP6]]
>> -; SKX-NEXT:    [[TMP24:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
>> -; SKX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP14]]
>> -; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP14]]
>> -; SKX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP6]]
>> -; SKX-NEXT:    [[TMP27:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; SKX-NEXT:    [[TMP28:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP27]]
>> -; SKX-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[OP_EXTRA]],
>> i32 [[TMP27]]
>> -; SKX-NEXT:    [[TMP30:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
>> -; SKX-NEXT:    [[TMP31:%.*]] = select i1 [[TMP30]], i32 3, i32 4
>> -; SKX-NEXT:    store i32 [[TMP31]], i32* @var, align 8
>> -; SKX-NEXT:    ret i32 [[TMP29]]
>> +; SKX-NEXT:    [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
>> +; SKX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
>> +; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]],
>> i32 [[TMP14]]
>> +; SKX-NEXT:    [[TMP17:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; SKX-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
>> +; SKX-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]],
>> i32 [[TMP17]]
>> +; SKX-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
>> +; SKX-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
>> +; SKX-NEXT:    store i32 [[TMP21]], i32* @var, align 8
>> +; SKX-NEXT:    ret i32 [[TMP19]]
>>  ;
>>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -854,33 +602,21 @@ define i32 @maxi8_wrong_parent(i32) {
>>  ; SSE:       pp:
>>  ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>>  ; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; SSE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> -; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
>> +; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> +; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>>  ; SSE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; SSE-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>>  ; SSE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>>  ; SSE-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; SSE-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; SSE-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
>> -; SSE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> [[TMP15]]
>> -; SSE-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
>> -; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP18]]
>> -; SSE-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
>> -; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP5]]
>> -; SSE-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
>> [[TMP18]]
>> +; SSE-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
>> +; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
>> [[TMP7]]
>> +; SSE-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
>> +; SSE-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
>> [[TMP8]]
>> +; SSE-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
>> +; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]],
>> i32 [[TMP5]]
>>  ; SSE-NEXT:    ret i32 [[OP_EXTRA]]
>>  ;
>>  ; AVX-LABEL: @maxi8_wrong_parent(
>> @@ -891,33 +627,21 @@ define i32 @maxi8_wrong_parent(i32) {
>>  ; AVX:       pp:
>>  ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>>  ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> -; AVX-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
>> +; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> +; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>>  ; AVX-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; AVX-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>>  ; AVX-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>>  ; AVX-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
>> -; AVX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> [[TMP15]]
>> -; AVX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
>> -; AVX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP18]]
>> -; AVX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
>> -; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP5]]
>> -; AVX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
>> [[TMP18]]
>> +; AVX-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
>> +; AVX-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
>> [[TMP7]]
>> +; AVX-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
>> +; AVX-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
>> [[TMP8]]
>> +; AVX-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
>> +; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]],
>> i32 [[TMP5]]
>>  ; AVX-NEXT:    ret i32 [[OP_EXTRA]]
>>  ;
>>  ; AVX2-LABEL: @maxi8_wrong_parent(
>> @@ -928,33 +652,21 @@ define i32 @maxi8_wrong_parent(i32) {
>>  ; AVX2:       pp:
>>  ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>>  ; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX2-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> -; AVX2-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX2-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX2-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
>> +; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> +; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>>  ; AVX2-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>>  ; AVX2-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX2-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX2-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
>> -; AVX2-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> [[TMP15]]
>> -; AVX2-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
>> -; AVX2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP18]]
>> -; AVX2-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
>> -; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP5]]
>> -; AVX2-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
>> [[TMP18]]
>> +; AVX2-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX2-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
>> +; AVX2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
>> [[TMP7]]
>> +; AVX2-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
>> +; AVX2-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
>> [[TMP8]]
>> +; AVX2-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
>> +; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]],
>> i32 [[TMP5]]
>>  ; AVX2-NEXT:    ret i32 [[OP_EXTRA]]
>>  ;
>>  ; SKX-LABEL: @maxi8_wrong_parent(
>> @@ -985,21 +697,9 @@ define i32 @maxi8_wrong_parent(i32) {
>>  ; SKX-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32
>> [[TMP4]], i32 1
>>  ; SKX-NEXT:    [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32>
>> [[TMP16]], <2 x i32> [[TMP18]]
>>  ; SKX-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
>> -; SKX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; SKX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; SKX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
>> -; SKX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> undef
>> -; SKX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
>> -; SKX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
>> undef
>> -; SKX-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
>> -; SKX-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
>> undef
>> -; SKX-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]]
>> -; SKX-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
>> [[TMP7]]
>> -; SKX-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]]
>> -; SKX-NEXT:    [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
>> -; SKX-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]]
>> -; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]],
>> i32 [[TMP20]]
>> -; SKX-NEXT:    [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
>> [[TMP8]]
>> +; SKX-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
>> +; SKX-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
>> +; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]],
>> i32 [[TMP20]]
>>  ; SKX-NEXT:    ret i32 [[OP_EXTRA]]
>>  ;
>>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -37,14 +37,11 @@ define i32 @add_red(float* %A, i32 %n) {
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float
>> 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
>> -; CHECK-NEXT:    [[ADD6:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT:    [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT:    [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
>>  ; CHECK-NEXT:    [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
>>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_033]], 1
>>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
>> @@ -77,14 +74,11 @@ define i32 @add_red(float* %A, i32 %n) {
>>  ; STORE-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x
>> float>*
>>  ; STORE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]],
>> align 4
>>  ; STORE-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float
>> 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
>> -; STORE-NEXT:    [[ADD6:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT:    [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; STORE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT:    [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
>>  ; STORE-NEXT:    [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
>>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_033]], 1
>>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
>> @@ -178,14 +172,11 @@ define i32 @mul_red(float* noalias %A, f
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
>> -; CHECK-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>>  ; CHECK-NEXT:    [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
>>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_040]], 1
>>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>> @@ -223,14 +214,11 @@ define i32 @mul_red(float* noalias %A, f
>>  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>>  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>>  ; STORE-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
>> -; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>>  ; STORE-NEXT:    [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
>>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_040]], 1
>>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>> @@ -350,13 +338,6 @@ define i32 @long_red(float* noalias %A,
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
>> -; CHECK-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>> -; CHECK-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>> -; CHECK-NEXT:    [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
>> -; CHECK-NEXT:    [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
>> -; CHECK-NEXT:    [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
>> -; CHECK-NEXT:    [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
>>  ; CHECK-NEXT:    [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
>>  ; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds float,
>> float* [[A]], i64 [[ADD47]]
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align
>> 4
>> @@ -369,7 +350,6 @@ define i32 @long_red(float* noalias %A,
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
>> -; CHECK-NEXT:    [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
>>  ; CHECK-NEXT:    [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
>>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_083]], 1
>>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
>> @@ -421,13 +401,6 @@ define i32 @long_red(float* noalias %A,
>>  ; STORE-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x
>> float>*
>>  ; STORE-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
>> align 4
>>  ; STORE-NEXT:    [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
>> -; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>> -; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>> -; STORE-NEXT:    [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
>> -; STORE-NEXT:    [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
>> -; STORE-NEXT:    [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
>> -; STORE-NEXT:    [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
>>  ; STORE-NEXT:    [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
>>  ; STORE-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds float,
>> float* [[A]], i64 [[ADD47]]
>>  ; STORE-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align
>> 4
>> @@ -440,7 +413,6 @@ define i32 @long_red(float* noalias %A,
>>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; STORE-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>>  ; STORE-NEXT:    [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
>> -; STORE-NEXT:    [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
>>  ; STORE-NEXT:    [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
>>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_083]], 1
>>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
>> @@ -576,16 +548,12 @@ define i32 @chain_red(float* noalias %A,
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
>> -; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
>> -; CHECK-NEXT:    [[ADD9:%.*]] = fadd fast float [[ADD]], undef
>> -; CHECK-NEXT:    [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
>> -; CHECK-NEXT:    [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
>>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_043]], 1
>>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>>  ; CHECK-NEXT:    br i1 [[EXITCOND]], label
>> [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
>> @@ -622,16 +590,12 @@ define i32 @chain_red(float* noalias %A,
>>  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>>  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>>  ; STORE-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
>> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
>> -; STORE-NEXT:    [[ADD9:%.*]] = fadd fast float [[ADD]], undef
>> -; STORE-NEXT:    [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>>  ; STORE-NEXT:    [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
>> -; STORE-NEXT:    [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
>>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_043]], 1
>>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>>  ; STORE-NEXT:    br i1 [[EXITCOND]], label
>> [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
>> @@ -1087,14 +1051,11 @@ define i32 @store_red(float* noalias %A,
>>  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>>  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>>  ; STORE-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]]
>> -; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>>  ; STORE-NEXT:    store float [[TMP6]], float* [[C_ADDR_038]], align 4
>>  ; STORE-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float*
>> [[C_ADDR_038]], i64 1
>>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_039]], 1
>> @@ -1169,14 +1130,11 @@ define void @float_red_example4(float* %
>>  ; STORE-LABEL: @float_red_example4(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([32 x float]* @arr_float to <4 x float>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>>  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1216,12 +1174,6 @@ define void @float_red_example8(float* %
>>  ; STORE-LABEL: @float_red_example8(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([32 x float]* @arr_float to <8 x float>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; STORE-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; STORE-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; STORE-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP0]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1229,7 +1181,6 @@ define void @float_red_example8(float* %
>>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>>  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1293,20 +1244,6 @@ define void @float_red_example16(float*
>>  ; STORE-LABEL: @float_red_example16(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast
>> ([32 x float]* @arr_float to <16 x float>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; STORE-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; STORE-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; STORE-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; STORE-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; STORE-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; STORE-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; STORE-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; STORE-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; STORE-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; STORE-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; STORE-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP0]],
>> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
>> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1316,7 +1253,6 @@ define void @float_red_example16(float*
>>  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]],
>> [[RDX_SHUF5]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <16 x float>
>> [[BIN_RDX6]], i32 0
>> -; STORE-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>>  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1372,14 +1308,11 @@ define void @i32_red_example4(i32* %res)
>>  ; STORE-LABEL: @i32_red_example4(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <4 x i32>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1419,12 +1352,6 @@ define void @i32_red_example8(i32* %res)
>>  ; STORE-LABEL: @i32_red_example8(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1432,7 +1359,6 @@ define void @i32_red_example8(i32* %res)
>>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1496,20 +1422,6 @@ define void @i32_red_example16(i32* %res
>>  ; STORE-LABEL: @i32_red_example16(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast
>> ([32 x i32]* @arr_i32 to <16 x i32>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> -; STORE-NEXT:    [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
>> -; STORE-NEXT:    [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
>> -; STORE-NEXT:    [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
>> -; STORE-NEXT:    [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
>> -; STORE-NEXT:    [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
>> -; STORE-NEXT:    [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
>> -; STORE-NEXT:    [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]],
>> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
>> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32>
>> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1519,7 +1431,6 @@ define void @i32_red_example16(i32* %res
>>  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x i32>
>> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]],
>> [[RDX_SHUF5]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
>>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1631,36 +1542,6 @@ define void @i32_red_example32(i32* %res
>>  ; STORE-LABEL: @i32_red_example32(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast
>> ([32 x i32]* @arr_i32 to <32 x i32>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> -; STORE-NEXT:    [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
>> -; STORE-NEXT:    [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
>> -; STORE-NEXT:    [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
>> -; STORE-NEXT:    [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
>> -; STORE-NEXT:    [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
>> -; STORE-NEXT:    [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
>> -; STORE-NEXT:    [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
>> -; STORE-NEXT:    [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
>> -; STORE-NEXT:    [[ADD_15:%.*]] = add nsw i32 undef, [[ADD_14]]
>> -; STORE-NEXT:    [[ADD_16:%.*]] = add nsw i32 undef, [[ADD_15]]
>> -; STORE-NEXT:    [[ADD_17:%.*]] = add nsw i32 undef, [[ADD_16]]
>> -; STORE-NEXT:    [[ADD_18:%.*]] = add nsw i32 undef, [[ADD_17]]
>> -; STORE-NEXT:    [[ADD_19:%.*]] = add nsw i32 undef, [[ADD_18]]
>> -; STORE-NEXT:    [[ADD_20:%.*]] = add nsw i32 undef, [[ADD_19]]
>> -; STORE-NEXT:    [[ADD_21:%.*]] = add nsw i32 undef, [[ADD_20]]
>> -; STORE-NEXT:    [[ADD_22:%.*]] = add nsw i32 undef, [[ADD_21]]
>> -; STORE-NEXT:    [[ADD_23:%.*]] = add nsw i32 undef, [[ADD_22]]
>> -; STORE-NEXT:    [[ADD_24:%.*]] = add nsw i32 undef, [[ADD_23]]
>> -; STORE-NEXT:    [[ADD_25:%.*]] = add nsw i32 undef, [[ADD_24]]
>> -; STORE-NEXT:    [[ADD_26:%.*]] = add nsw i32 undef, [[ADD_25]]
>> -; STORE-NEXT:    [[ADD_27:%.*]] = add nsw i32 undef, [[ADD_26]]
>> -; STORE-NEXT:    [[ADD_28:%.*]] = add nsw i32 undef, [[ADD_27]]
>> -; STORE-NEXT:    [[ADD_29:%.*]] = add nsw i32 undef, [[ADD_28]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]],
>> <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x i32>
>> [[BIN_RDX]], <32 x i32> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1672,7 +1553,6 @@ define void @i32_red_example32(i32* %res
>>  ; STORE-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x i32>
>> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]],
>> [[RDX_SHUF7]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_30:%.*]] = add nsw i32 undef, [[ADD_29]]
>>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1750,12 +1630,6 @@ define void @i32_red_call(i32 %val) {
>>  ; CHECK-LABEL: @i32_red_call(
>>  ; CHECK-NEXT:  entry:
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1763,19 +1637,12 @@ define void @i32_red_call(i32 %val) {
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>>  ; CHECK-NEXT:    [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
>>  ; CHECK-NEXT:    ret void
>>  ;
>>  ; STORE-LABEL: @i32_red_call(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1783,7 +1650,6 @@ define void @i32_red_call(i32 %val) {
>>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>>  ; STORE-NEXT:    [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
>>  ; STORE-NEXT:    ret void
>>  ;
>> @@ -1811,12 +1677,6 @@ define void @i32_red_invoke(i32 %val) pe
>>  ; CHECK-LABEL: @i32_red_invoke(
>>  ; CHECK-NEXT:  entry:
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1824,7 +1684,6 @@ define void @i32_red_invoke(i32 %val) pe
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>>  ; CHECK-NEXT:    [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
>>  ; CHECK-NEXT:    to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
>>  ; CHECK:       exception:
>> @@ -1837,12 +1696,6 @@ define void @i32_red_invoke(i32 %val) pe
>>  ; STORE-LABEL: @i32_red_invoke(
>>  ; STORE-NEXT:  entry:
>>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1850,7 +1703,6 @@ define void @i32_red_invoke(i32 %val) pe
>>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>>  ; STORE-NEXT:    [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
>>  ; STORE-NEXT:    to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
>>  ; STORE:       exception:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll Mon Sep
>> 23 09:25:03 2019
>> @@ -12,10 +12,10 @@ define i32 @test(double* nocapture %A, i
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align
>> 1
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 3, i8 3>
>> -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
>> -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i8> undef, i8
>> [[TMP3]], i32 0
>> -; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
>> -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8
>> [[TMP5]], i32 1
>> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
>> +; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
>> +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i8> undef, i8
>> [[TMP4]], i32 0
>> +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8
>> [[TMP3]], i32 1
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double>
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], <double
>> 1.000000e+00, double 1.000000e+00>
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>> Mon Sep 23 09:25:03 2019
>> @@ -5,36 +5,6 @@ define signext i8 @Foo(<32 x i8>* %__v)
>>  ; CHECK-LABEL: @Foo(
>>  ; CHECK-NEXT:  entry:
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]],
>> align 32
>> -; CHECK-NEXT:    [[ADD_I_1_I:%.*]] = add i8 undef, undef
>> -; CHECK-NEXT:    [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], undef
>> -; CHECK-NEXT:    [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]],
>> <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x i8>
>> [[BIN_RDX]], <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32
>> 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -46,7 +16,6 @@ define signext i8 @Foo(<32 x i8>* %__v)
>>  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x i8>
>> [[BIN_RDX6]], <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]],
>> [[RDX_SHUF7]]
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]],
>> i32 0
>> -; CHECK-NEXT:    [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], undef
>>  ; CHECK-NEXT:    ret i8 [[TMP1]]
>>  ;
>>  entry:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -35,13 +35,6 @@ define i32 @test(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42,
>> i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
>> -; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -50,7 +43,6 @@ define i32 @test(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]]
>> -; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>>  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>>  ; CHECK:       for.end:
>>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
>> @@ -138,13 +130,6 @@ define i32 @test2(i32* nocapture readonl
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
>> -; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -153,7 +138,6 @@ define i32 @test2(i32* nocapture readonl
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
>> -; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>>  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>>  ; CHECK:       for.end:
>>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
>> @@ -258,13 +242,6 @@ define i32 @test3(i32* nocapture readonl
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
>>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]],
>> [[TMP3]]
>> -; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
>> -; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
>> -; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
>> -; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
>> -; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
>> -; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
>> -; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -273,7 +250,6 @@ define i32 @test3(i32* nocapture readonl
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
>> -; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>>  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>>  ; CHECK:       for.end:
>>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>> Mon Sep 23 09:25:03 2019
>> @@ -26,12 +26,6 @@ define i32 @test_add(i32* nocapture read
>>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[MUL_18:%.*]] = add i32 undef, undef
>> -; CHECK-NEXT:    [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
>> -; CHECK-NEXT:    [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
>> -; CHECK-NEXT:    [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
>> -; CHECK-NEXT:    [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
>> -; CHECK-NEXT:    [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -39,7 +33,6 @@ define i32 @test_add(i32* nocapture read
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
>>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>>  ;
>>  entry:
>> @@ -147,12 +140,6 @@ define i32 @test_and(i32* nocapture read
>>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[MUL_18:%.*]] = and i32 undef, undef
>> -; CHECK-NEXT:    [[MUL_29:%.*]] = and i32 undef, [[MUL_18]]
>> -; CHECK-NEXT:    [[MUL_310:%.*]] = and i32 undef, [[MUL_29]]
>> -; CHECK-NEXT:    [[MUL_411:%.*]] = and i32 undef, [[MUL_310]]
>> -; CHECK-NEXT:    [[MUL_512:%.*]] = and i32 undef, [[MUL_411]]
>> -; CHECK-NEXT:    [[MUL_613:%.*]] = and i32 undef, [[MUL_512]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -160,7 +147,6 @@ define i32 @test_and(i32* nocapture read
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[MUL_714:%.*]] = and i32 undef, [[MUL_613]]
>>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>>  ;
>>  entry:
>> @@ -208,12 +194,6 @@ define i32 @test_or(i32* nocapture reado
>>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[MUL_18:%.*]] = or i32 undef, undef
>> -; CHECK-NEXT:    [[MUL_29:%.*]] = or i32 undef, [[MUL_18]]
>> -; CHECK-NEXT:    [[MUL_310:%.*]] = or i32 undef, [[MUL_29]]
>> -; CHECK-NEXT:    [[MUL_411:%.*]] = or i32 undef, [[MUL_310]]
>> -; CHECK-NEXT:    [[MUL_512:%.*]] = or i32 undef, [[MUL_411]]
>> -; CHECK-NEXT:    [[MUL_613:%.*]] = or i32 undef, [[MUL_512]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -221,7 +201,6 @@ define i32 @test_or(i32* nocapture reado
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[MUL_714:%.*]] = or i32 undef, [[MUL_613]]
>>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>>  ;
>>  entry:
>> @@ -269,12 +248,6 @@ define i32 @test_xor(i32* nocapture read
>>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[MUL_18:%.*]] = xor i32 undef, undef
>> -; CHECK-NEXT:    [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]]
>> -; CHECK-NEXT:    [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]]
>> -; CHECK-NEXT:    [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]]
>> -; CHECK-NEXT:    [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]]
>> -; CHECK-NEXT:    [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -282,7 +255,6 @@ define i32 @test_xor(i32* nocapture read
>>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT:    [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]]
>>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>>  ;
>>  entry:
>> @@ -322,15 +294,12 @@ define i32 @PR37731(<4 x i32>* noalias n
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], <i32 18, i32 2,
>> i32 7, i32 13>
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]]
>>  ; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16
>> -; CHECK-NEXT:    [[TMP7:%.*]] = xor i32 undef, undef
>> -; CHECK-NEXT:    [[TMP8:%.*]] = xor i32 [[TMP7]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT:    [[TMP10:%.*]] = xor i32 [[TMP8]], undef
>> -; CHECK-NEXT:    ret i32 [[TMP9]]
>> +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT:    ret i32 [[TMP7]]
>>  ;
>>  entry:
>>    %0 = load <4 x i32>, <4 x i32>* %self, align 16
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -33,11 +33,8 @@ define i32 @foo(i32* %diff) #0 {
>>  ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
>>  ; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]],
>> align 4
>>  ; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]]
>> -; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 undef, [[A_088]]
>>  ; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x
>> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1
>> -; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef
>>  ; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x
>> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2
>> -; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef
>>  ; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x
>> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3
>>  ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
>>  ; CHECK-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align
>> 16
>> @@ -47,7 +44,6 @@ define i32 @foo(i32* %diff) #0 {
>>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>>  ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]]
>> -; CHECK-NEXT:    [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef
>>  ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
>>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
>>  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label
>> [[FOR_BODY]]
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>> Mon Sep 23 09:25:03 2019
>> @@ -19,11 +19,6 @@ define void @hoge() {
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
>>  ; CHECK-NEXT:    [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2
>> x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
>>  ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], <i32 undef,
>> i32 15, i32 31, i32 47>
>> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32
>> undef
>> -; CHECK-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]],
>> <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]],
>> [[RDX_SHUF9]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]]
>> @@ -31,28 +26,12 @@ define void @hoge() {
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32>
>> [[RDX_SHUF12]]
>>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT14]], i32 0
>> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32
>> undef
>>  ; CHECK-NEXT:    [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
>>  ; CHECK-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63
>>  ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
>>  ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2
>> x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49,
>> i32 -33, i32 -33, i32 -17>
>> -; CHECK-NEXT:    [[TMP26:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32
>> undef
>> -; CHECK-NEXT:    [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef
>> -; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 undef, i32
>> [[TMP27]]
>> -; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], i32 undef, i32
>> undef
>> -; CHECK-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP29]]
>> -; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP29]], i32
>> [[TMP32]]
>> -; CHECK-NEXT:    [[TMP36:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP37:%.*]] = select i1 [[TMP36]], i32 undef, i32
>> undef
>> -; CHECK-NEXT:    [[TMP38:%.*]] = icmp sgt i32 [[TMP37]], [[TMP34]]
>> -; CHECK-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP34]], i32
>> [[TMP37]]
>> -; CHECK-NEXT:    [[TMP41:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32
>> undef
>> -; CHECK-NEXT:    [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]]
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]]
>> @@ -70,7 +49,6 @@ define void @hoge() {
>>  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32
>> [[OP_EXTRA5]], i32 undef
>>  ; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef
>>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32
>> [[OP_EXTRA6]], i32 undef
>> -; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32
>> [[TMP42]]
>>  ; CHECK-NEXT:    [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]]
>>  ; CHECK-NEXT:    unreachable
>>  ;
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -16,15 +16,6 @@ define void @_Z2azv() local_unnamed_addr
>>  ; CHECK-NEXT:    [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds
>> %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76",
>> %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1
>>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x
>> i32>*
>>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT:    [[CMP_I1_4:%.*]] = icmp slt i32 undef, undef
>> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_4:%.*]] = select i1 [[CMP_I1_4]],
>> i32 undef, i32 undef
>> -; CHECK-NEXT:    [[CMP_I1_5:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_4]], undef
>> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_5:%.*]] = select i1 [[CMP_I1_5]],
>> i32 undef, i32 [[DOTSROA_SPECULATED_4]]
>> -; CHECK-NEXT:    [[CMP_I1_6:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_5]], undef
>> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_6:%.*]] = select i1 [[CMP_I1_6]],
>> i32 undef, i32 [[DOTSROA_SPECULATED_5]]
>> -; CHECK-NEXT:    [[CMP_I1_7:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_6]], undef
>> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_7:%.*]] = select i1 [[CMP_I1_7]],
>> i32 undef, i32 [[DOTSROA_SPECULATED_6]]
>> -; CHECK-NEXT:    [[CMP_I1_8:%.*]] = icmp slt i32 undef, undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP1]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP1]], <8 x i32> [[RDX_SHUF]]
>> @@ -39,7 +30,6 @@ define void @_Z2azv() local_unnamed_addr
>>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]],
>> i32 undef
>>  ; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef
>>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP4]], i32
>> [[OP_EXTRA]], i32 undef
>> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_8:%.*]] = select i1 [[CMP_I1_8]],
>> i32 undef, i32 undef
>>  ; CHECK-NEXT:    [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32
>> undef, i32 [[OP_EXTRA7]]
>>  ; CHECK-NEXT:    [[CMP_I1_10:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_9]], undef
>>  ; CHECK-NEXT:    ret void
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> ---
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>> (original)
>> +++
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -18,19 +18,6 @@ define i32 @foo(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
>> [[A7:%.*]], i32 6
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
>> [[A8:%.*]], i32 7
>>  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
>> -; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
>> -; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
>> -; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
>> -; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
>> undef
>> -; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
>> -; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
>> -; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
>> -; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
>> -; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]],
>> <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
>> @@ -41,7 +28,6 @@ define i32 @foo(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
>> i32 undef
>>  ; CHECK-NEXT:    ret i32 [[TMP11]]
>>  ;
>>  entry:
>> @@ -92,19 +78,6 @@ define i32 @foo1(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
>> [[A7:%.*]], i32 6
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
>> [[A8:%.*]], i32 7
>>  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
>> -; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
>> -; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
>> -; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
>> -; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
>> undef
>> -; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
>> -; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
>> -; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
>> -; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
>> -; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]],
>> <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
>> @@ -115,7 +88,6 @@ define i32 @foo1(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
>> i32 undef
>>  ; CHECK-NEXT:    ret i32 [[TMP11]]
>>  ;
>>  entry:
>> @@ -170,19 +142,6 @@ define i32 @foo2(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
>> [[A7:%.*]], i32 6
>>  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
>> [[A8:%.*]], i32 7
>>  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
>> -; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
>> -; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
>> -; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
>> -; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
>> undef
>> -; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
>> -; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
>> -; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
>> -; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
>> -; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
>> i32 undef
>> -; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]],
>> <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
>> [[RDX_SHUF]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
>> @@ -193,7 +152,6 @@ define i32 @foo2(i32* nocapture readonly
>>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>>  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
>> i32 undef
>>  ; CHECK-NEXT:    ret i32 [[TMP11]]
>>  ;
>>  entry:
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190926/49c691a4/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4849 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190926/49c691a4/attachment-0001.bin>
    
    
More information about the llvm-commits
mailing list