[llvm] r372626 - [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!")

Jordan Rupprecht via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 26 14:55:30 PDT 2019


Looks like this causes some crashers now. On the unreduced C++ source we
see it w/ -fexperimental-new-pass-manager but on the reduced case it seems
to not matter. (Strange...)

Anyway, the repro: $ clang -O3 -c reduced.ll

Where reduced.ll is:

; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target triple = "x86_64-unknown-linux-gnu"

@k = external dso_local constant [8 x [4 x i32]], align 16
@l = external dso_local global [366 x i32], align 16

define void @n() {
entry:
  %i = alloca i32, align 4
  %a = alloca i32, align 4
  %b = alloca i32, align 4
  %c = alloca i32, align 4
  %cb = alloca i32, align 4
  %cw = alloca i32, align 4
  %d = alloca i32, align 4
  %e = alloca i32, align 4
  br label %for.cond

for.cond:                                         ; preds = %for.end17,
%entry
  %0 = load i32, i32* %i, align 4
  %add = add nsw i32 %0, -183
  store i32 %add, i32* %a, align 4
  store i32 0, i32* %c, align 4
  store i32 0, i32* %cb, align 4
  br label %for.cond3

for.cond3:                                        ; preds = %for.end,
%for.cond
  %1 = load i32, i32* %cb, align 4
  %cmp4 = icmp slt i32 %1, 8
  br i1 %cmp4, label %for.body5, label %for.end17

for.body5:                                        ; preds = %for.cond3
  store i32 0, i32* %cw, align 4
  br label %for.cond6

for.cond6:                                        ; preds = %if.end14,
%for.body5
  %2 = load i32, i32* %cw, align 4
  %cmp7 = icmp slt i32 %2, 4
  %3 = load i32, i32* %cb, align 4
  br i1 %cmp7, label %for.body8, label %for.end

for.body8:                                        ; preds = %for.cond6
  %g = sext i32 %3 to i64
  %arrayidx = getelementptr inbounds [8 x [4 x i32]], [8 x [4 x i32]]* @k,
i64 0, i64 %g
  %4 = load i32, i32* %cw, align 4
  %f = sext i32 %4 to i64
  %h = getelementptr inbounds [4 x i32], [4 x i32]* %arrayidx, i64 0, i64 %f
  %5 = load i32, i32* %h, align 4
  store i32 %5, i32* %d, align 4
  %6 = load i32, i32* %a, align 4
  %7 = load i32, i32* %d, align 4
  %sub = sub nsw i32 %6, %7
  %j = call i32 @abs(i32 %sub)
  store i32 %j, i32* %e, align 4
  %8 = load i32, i32* %e, align 4
  %9 = load i32, i32* %b, align 4
  %cmp12 = icmp slt i32 %8, %9
  br i1 %cmp12, label %if.then13, label %if.end14

if.then13:                                        ; preds = %for.body8
  %10 = load i32, i32* %cb, align 4
  store i32 %10, i32* %c, align 4
  %11 = load i32, i32* %e, align 4
  store i32 %11, i32* %b, align 4
  br label %if.end14

if.end14:                                         ; preds = %if.then13,
%for.body8
  %12 = load i32, i32* %cw, align 4
  %inc = add nsw i32 %12, 1
  store i32 %inc, i32* %cw, align 4
  br label %for.cond6

for.end:                                          ; preds = %for.cond6
  %inc16 = add nsw i32 %3, 1
  store i32 %inc16, i32* %cb, align 4
  br label %for.cond3

for.end17:                                        ; preds = %for.cond3
  %13 = load i32, i32* %c, align 4
  %14 = load i32, i32* %i, align 4
  %g18 = sext i32 %14 to i64
  %k = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 %g18
  store i32 %13, i32* %k, align 4
  %15 = load i32, i32* %i, align 4
  %inc21 = add nsw i32 %15, 1
  store i32 %inc21, i32* %i, align 4
  br label %for.cond
}

declare i32 @abs(i32)

=>

Stack dump:
0.      Program arguments: /src/llvm-build/dev/bin/clang-10 -cc1 -triple
x86_64-unknown-linux-gnu -emit-obj -disable-free -disable-llvm-verifier
-discard-value-names -main-file-name reduced.ll -mrelocation-model static
-mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose
-mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64
-dwarf-column-info -debugger-tuning=gdb -coverage-notes-file
/tmp/crash/reduced.gcno -resource-dir /src/llvm-build/dev/lib/clang/10.0.0
-O3 -fdebug-compilation-dir /tmp/crash -ferror-limit 19 -fmessage-length 0
-fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops
-vectorize-slp -faddrsig -o reduced.o -x ir reduced.ll
1.      Per-module optimization passes
2.      Running pass 'Function Pass Manager' on module 'reduced.ll'.
3.      Running pass 'Combine redundant instructions' on function '@n'
 #0 0x00007fd9935ae474 llvm::sys::PrintStackTrace(llvm::raw_ostream&)
/src/llvm-project/llvm/lib/Support/Unix/Signals.inc:532:13

 #1 0x00007fd9935ae474 PrintStackTraceSignalHandler(void*)
/src/llvm-project/llvm/lib/Support/Unix/Signals.inc:592:3

 #2 0x00007fd9935ac34e llvm::sys::RunSignalHandlers()
/src/llvm-project/llvm/lib/Support/Signals.cpp:69:18


 #3 0x00007fd9935ae728 SignalHandler(int)
/src/llvm-project/llvm/lib/Support/Unix/Signals.inc:384:1
 #4 0x00007fd992fa73a0 __restore_rt
(/lib/x86_64-linux-gnu/libpthread.so.0+0x123a0)
 #5 0x00007fd993a9c4cc llvm::ConstantInt::classof(llvm::Value const*)
/src/llvm-project/llvm/include/llvm/IR/Constants.h:256:28

 #6 0x00007fd993a9c4cc llvm::isa_impl<llvm::ConstantInt, llvm::Value,
void>::doit(llvm::Value const&)
/src/llvm-project/llvm/include/llvm/Support/Casting.h:58:12

 #7 0x00007fd993a9c4cc llvm::isa_impl_cl<llvm::ConstantInt, llvm::Value
const*>::doit(llvm::Value const*)
/src/llvm-project/llvm/include/llvm/Support/Casting.h:106:12

 #8 0x00007fd993a9c4cc llvm::isa_impl_wrap<llvm::ConstantInt, llvm::Value
const*, llvm::Value const*>::doit(llvm::Value const* const&)
/src/llvm-project/llvm/include/llvm/Support/Casting.h:132:12
 #9 0x00007fd993a9c4cc llvm::isa_impl_wrap<llvm::ConstantInt, llvm::Value*
const, llvm::Value const*>::doit(llvm::Value* const&)
/src/llvm-project/llvm/include/llvm/Support/Casting.h:122:12

#10 0x00007fd993a9c4cc bool llvm::isa<llvm::ConstantInt,
llvm::Value*>(llvm::Value* const&)
/src/llvm-project/llvm/include/llvm/Support/Casting.h:142:10

#11 0x00007fd993a9c4cc llvm::cast_retty<llvm::ConstantInt,
llvm::Value*>::ret_type llvm::dyn_cast<llvm::ConstantInt,
llvm::Value>(llvm::Value*)
/src/llvm-project/llvm/include/llvm/Support/Casting.h:343:10
#12 0x00007fd993a9c4cc llvm::InstCombiner::foldOrOfICmps(llvm::ICmpInst*,
llvm::ICmpInst*, llvm::Instruction&)
/src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp:2148:23

#13 0x00007fd993aa260e llvm::InstCombiner::visitOr(llvm::BinaryOperator&)
/src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp:2592:18

#14 0x00007fd993a7f0d0 llvm::InstCombiner::run()
/src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3312:22


#15 0x00007fd993a8013f combineInstructionsOverFunction(llvm::Function&,
llvm::InstCombineWorklist&, llvm::AAResults*, llvm::AssumptionCache&,
llvm::TargetLibraryInfo&, llvm::DominatorTree&,
llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*,
llvm::ProfileSummaryInfo*, bool, llvm::LoopInfo*)
/src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3546:13

#16 0x00007fd993a81077
llvm::InstructionCombiningPass::runOnFunction(llvm::Function&)
/src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3620:10

#17 0x00007fd993ef1816 llvm::FPPassManager::runOnFunction(llvm::Function&)
/src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1648:27

#18 0x00007fd993ef1ad3 llvm::FPPassManager::runOnModule(llvm::Module&)
/src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1685:13

#19 0x00007fd993ef2128 (anonymous
namespace)::MPPassManager::runOnModule(llvm::Module&)
/src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1750:27

#20 0x00007fd993ef2128 llvm::legacy::PassManagerImpl::run(llvm::Module&)
/src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1863:44

#21 0x00007fd992767ac2 (anonymous
namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction,
std::unique_ptr<llvm::raw_pwrite_stream,
std::default_delete<llvm::raw_pwrite_stream> >)
/src/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:909:3
#22 0x00007fd992767ac2 clang::EmitBackendOutput(clang::DiagnosticsEngine&,
clang::HeaderSearchOptions const&, clang::CodeGenOptions const&,
clang::TargetOptions const&, clang::LangOptions const&, llvm::DataLayout
const&, llvm::Module*, clang::BackendAction,
std::unique_ptr<llvm::raw_pwrite_stream,
std::default_delete<llvm::raw_pwrite_stream> >)
/src/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1533:15
#23 0x00007fd9929c4e4d std::unique_ptr<llvm::raw_pwrite_stream,
std::default_delete<llvm::raw_pwrite_stream> >::~unique_ptr()
/usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/unique_ptr.h:273:6

#24 0x00007fd9929c4e4d clang::CodeGenAction::ExecuteAction()
/src/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:1080:5

#25 0x00007fd9923ee4a9 clang::FrontendAction::Execute()
/src/llvm-project/clang/lib/Frontend/FrontendAction.cpp:939:10


#26 0x00007fd99238de50 llvm::Error::getPtr() const
/src/llvm-project/llvm/include/llvm/Support/Error.h:273:42


#27 0x00007fd99238de50 llvm::Error::operator bool()
/src/llvm-project/llvm/include/llvm/Support/Error.h:236:16


#28 0x00007fd99238de50
clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
/src/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:957:23

#29 0x00007fd9922e952c
clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
/src/llvm-project/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:290:25

#30 0x0000000000213bfd cc1_main(llvm::ArrayRef<char const*>, char const*,
void*) /src/llvm-project/clang/tools/driver/cc1_main.cpp:250:15

#31 0x0000000000211e3f ExecuteCC1Tool(llvm::ArrayRef<char const*>,
llvm::StringRef) /src/llvm-project/clang/tools/driver/driver.cpp:309:12

#32 0x0000000000211e3f main
/src/llvm-project/clang/tools/driver/driver.cpp:382:12

On Mon, Sep 23, 2019 at 9:23 AM Alexey Bataev via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: abataev
> Date: Mon Sep 23 09:25:03 2019
> New Revision: 372626
>
> URL: http://llvm.org/viewvc/llvm-project?rev=372626&view=rev
> Log:
> [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L)
> && "SCEVAddRecExpr operand is not loop-invariant!")
>
> Summary:
> Initially SLP vectorizer replaced all going-to-be-vectorized
> instructions with Undef values. It may break ScalarEvaluation and may
> cause a crash.
> Reworked SLP vectorizer so that it does not replace vectorized
> instructions by UndefValue anymore. Instead vectorized instructions are
> marked for deletion inside if BoUpSLP class and deleted upon class
> destruction.
>
> Reviewers: mzolotukhin, mkuper, hfinkel, RKSimon, davide, spatel
>
> Subscribers: RKSimon, Gerolf, anemet, hans, majnemer, llvm-commits, sanjoy
>
> Differential Revision: https://reviews.llvm.org/D29641
>
> Added:
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll
> Modified:
>     llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
>     llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>     llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>
> Modified: llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h Mon Sep
> 23 09:25:03 2019
> @@ -24,7 +24,6 @@
>  #include "llvm/ADT/SmallVector.h"
>  #include "llvm/Analysis/AliasAnalysis.h"
>  #include "llvm/IR/PassManager.h"
> -#include "llvm/IR/ValueHandle.h"
>
>  namespace llvm {
>
> @@ -60,8 +59,8 @@ extern cl::opt<bool> RunSLPVectorization
>  struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
>    using StoreList = SmallVector<StoreInst *, 8>;
>    using StoreListMap = MapVector<Value *, StoreList>;
> -  using WeakTrackingVHList = SmallVector<WeakTrackingVH, 8>;
> -  using WeakTrackingVHListMap = MapVector<Value *, WeakTrackingVHList>;
> +  using GEPList = SmallVector<GetElementPtrInst *, 8>;
> +  using GEPListMap = MapVector<Value *, GEPList>;
>
>    ScalarEvolution *SE = nullptr;
>    TargetTransformInfo *TTI = nullptr;
> @@ -131,7 +130,7 @@ private:
>
>    /// Tries to vectorize constructs started from CmpInst, InsertValueInst
> or
>    /// InsertElementInst instructions.
> -  bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
> +  bool vectorizeSimpleInstructions(SmallVectorImpl<Instruction *>
> &Instructions,
>                                     BasicBlock *BB, slpvectorizer::BoUpSLP
> &R);
>
>    /// Scan the basic block and look for patterns that are likely to start
> @@ -147,7 +146,7 @@ private:
>    StoreListMap Stores;
>
>    /// The getelementptr instructions in a basic block organized by base
> pointer.
> -  WeakTrackingVHListMap GEPs;
> +  GEPListMap GEPs;
>  };
>
>  } // end namespace llvm
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Mon Sep 23
> 09:25:03 2019
> @@ -1121,6 +1121,14 @@ public:
>  #endif
>    };
>
> +  /// Checks if the instruction is marked for deletion.
> +  bool isDeleted(Instruction *I) const { return
> DeletedInstructions.count(I); }
> +
> +  /// Marks values for later deletion.
> +  void eraseInstructions(ArrayRef<Value *> AV);
> +
> +  ~BoUpSLP();
> +
>  private:
>    /// Checks if all users of \p I are the part of the vectorization tree.
>    bool areAllUsersVectorized(Instruction *I) const;
> @@ -1491,14 +1499,12 @@ private:
>    /// AliasCache, which can happen if a new instruction is allocated at
> the
>    /// same address as a previously deleted instruction.
>    void eraseInstruction(Instruction *I) {
> -    I->removeFromParent();
> -    I->dropAllReferences();
> -    DeletedInstructions.emplace_back(I);
> +    DeletedInstructions.insert(I);
>    }
>
>    /// Temporary store for deleted instructions. Instructions will be
> deleted
>    /// eventually when the BoUpSLP is destructed.
> -  SmallVector<unique_value, 8> DeletedInstructions;
> +  SmallPtrSet<Instruction *, 8> DeletedInstructions;
>
>    /// A list of values that need to extracted out of the tree.
>    /// This list holds pairs of (Internal Scalar : External User).
> External User
> @@ -2055,6 +2061,22 @@ template <> struct DOTGraphTraits<BoUpSL
>
>  } // end namespace llvm
>
> +BoUpSLP::~BoUpSLP() {
> +  for (auto *I : DeletedInstructions)
> +    I->dropAllReferences();
> +  for (auto *I : DeletedInstructions) {
> +    assert(I->use_empty() && "trying to erase instruction with users.");
> +    I->eraseFromParent();
> +  }
> +}
> +
> +void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
> +  for (auto *V : AV) {
> +    if (auto *I = dyn_cast<Instruction>(V))
> +      eraseInstruction(I);
> +  };
> +}
> +
>  void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
>                          ArrayRef<Value *> UserIgnoreLst) {
>    ExtraValueToDebugLocsMap ExternallyUsedValues;
> @@ -3541,7 +3563,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *>
>    // Generate the 'InsertElement' instruction.
>    for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
>      Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
> -    if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
> +    if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
>        GatherSeq.insert(Insrt);
>        CSEBlocks.insert(Insrt->getParent());
>
> @@ -4290,20 +4312,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebug
>      for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
>        Value *Scalar = Entry->Scalars[Lane];
>
> +#ifndef NDEBUG
>        Type *Ty = Scalar->getType();
>        if (!Ty->isVoidTy()) {
> -#ifndef NDEBUG
>          for (User *U : Scalar->users()) {
>            LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
>
> -          // It is legal to replace users in the ignorelist by undef.
> +          // It is legal to delete users in the ignorelist.
>            assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
> -                 "Replacing out-of-tree value with undef");
> +                 "Deleting out-of-tree value");
>          }
> -#endif
> -        Value *Undef = UndefValue::get(Ty);
> -        Scalar->replaceAllUsesWith(Undef);
>        }
> +#endif
>        LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
>        eraseInstruction(cast<Instruction>(Scalar));
>      }
> @@ -4319,7 +4339,7 @@ void BoUpSLP::optimizeGatherSequence() {
>                      << " gather sequences instructions.\n");
>    // LICM InsertElementInst sequences.
>    for (Instruction *I : GatherSeq) {
> -    if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I))
> +    if (isDeleted(I))
>        continue;
>
>      // Check if this block is inside a loop.
> @@ -4373,6 +4393,8 @@ void BoUpSLP::optimizeGatherSequence() {
>      // For all instructions in blocks containing gather sequences:
>      for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
>        Instruction *In = &*it++;
> +      if (isDeleted(In))
> +        continue;
>        if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
>          continue;
>
> @@ -5255,19 +5277,6 @@ bool SLPVectorizerPass::runImpl(Function
>    return Changed;
>  }
>
> -/// Check that the Values in the slice in VL array are still existent in
> -/// the WeakTrackingVH array.
> -/// Vectorization of part of the VL array may cause later values in the
> VL array
> -/// to become invalid. We track when this has happened in the
> WeakTrackingVH
> -/// array.
> -static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
> -                               ArrayRef<WeakTrackingVH> VH, unsigned
> SliceBegin,
> -                               unsigned SliceSize) {
> -  VL = VL.slice(SliceBegin, SliceSize);
> -  VH = VH.slice(SliceBegin, SliceSize);
> -  return !std::equal(VL.begin(), VL.end(), VH.begin());
> -}
> -
>  bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
> BoUpSLP &R,
>                                              unsigned VecRegSize) {
>    const unsigned ChainLen = Chain.size();
> @@ -5279,20 +5288,20 @@ bool SLPVectorizerPass::vectorizeStoreCh
>    if (!isPowerOf2_32(Sz) || VF < 2)
>      return false;
>
> -  // Keep track of values that were deleted by vectorizing in the loop
> below.
> -  const SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(),
> Chain.end());
> -
>    bool Changed = false;
>    // Look for profitable vectorizable trees at all offsets, starting at
> zero.
>    for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) {
>
> +    ArrayRef<Value *> Operands = Chain.slice(i, VF);
>      // Check that a previous iteration of this loop did not delete the
> Value.
> -    if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
> +    if (llvm::any_of(Operands, [&R](Value *V) {
> +          auto *I = dyn_cast<Instruction>(V);
> +          return I && R.isDeleted(I);
> +        }))
>        continue;
>
>      LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset "
> << i
>                        << "\n");
> -    ArrayRef<Value *> Operands = Chain.slice(i, VF);
>
>      R.buildTree(Operands);
>      if (R.isTreeTinyAndNotFullyVectorizable())
> @@ -5484,9 +5493,6 @@ bool SLPVectorizerPass::tryToVectorizeLi
>    bool CandidateFound = false;
>    int MinCost = SLPCostThreshold;
>
> -  // Keep track of values that were deleted by vectorizing in the loop
> below.
> -  SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
> -
>    unsigned NextInst = 0, MaxInst = VL.size();
>    for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /=
> 2) {
>      // No actual vectorization should happen, if number of parts is the
> same as
> @@ -5506,13 +5512,16 @@ bool SLPVectorizerPass::tryToVectorizeLi
>        if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
>          break;
>
> +      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
>        // Check that a previous iteration of this loop did not delete the
> Value.
> -      if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
> +      if (llvm::any_of(Ops, [&R](Value *V) {
> +            auto *I = dyn_cast<Instruction>(V);
> +            return I && R.isDeleted(I);
> +          }))
>          continue;
>
>        LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
>                          << "\n");
> -      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
>
>        R.buildTree(Ops);
>        Optional<ArrayRef<unsigned>> Order = R.bestOrder();
> @@ -5733,23 +5742,23 @@ class HorizontalReduction {
>        case RK_Min:
>          Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS,
> RHS)
>                                            : Builder.CreateFCmpOLT(LHS,
> RHS);
> -        break;
> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>        case RK_Max:
>          Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS,
> RHS)
>                                            : Builder.CreateFCmpOGT(LHS,
> RHS);
> -        break;
> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>        case RK_UMin:
>          assert(Opcode == Instruction::ICmp && "Expected integer types.");
>          Cmp = Builder.CreateICmpULT(LHS, RHS);
> -        break;
> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>        case RK_UMax:
>          assert(Opcode == Instruction::ICmp && "Expected integer types.");
>          Cmp = Builder.CreateICmpUGT(LHS, RHS);
> -        break;
> +        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>        case RK_None:
> -        llvm_unreachable("Unknown reduction operation.");
> +        break;
>        }
> -      return Builder.CreateSelect(Cmp, LHS, RHS, Name);
> +      llvm_unreachable("Unknown reduction operation.");
>      }
>
>    public:
> @@ -6429,6 +6438,9 @@ public:
>        }
>        // Update users.
>        ReductionRoot->replaceAllUsesWith(VectorizedTree);
> +      // Mark all scalar reduction ops for deletion, they are replaced by
> the
> +      // vector reductions.
> +      V.eraseInstructions(IgnoreList);
>      }
>      return VectorizedTree != nullptr;
>    }
> @@ -6683,18 +6695,13 @@ static bool tryToVectorizeHorReductionOr
>    // horizontal reduction.
>    // Interrupt the process if the Root instruction itself was vectorized
> or all
>    // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
> -  SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0});
> +  SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
>    SmallPtrSet<Value *, 8> VisitedInstrs;
>    bool Res = false;
>    while (!Stack.empty()) {
> -    Value *V;
> +    Instruction *Inst;
>      unsigned Level;
> -    std::tie(V, Level) = Stack.pop_back_val();
> -    if (!V)
> -      continue;
> -    auto *Inst = dyn_cast<Instruction>(V);
> -    if (!Inst)
> -      continue;
> +    std::tie(Inst, Level) = Stack.pop_back_val();
>      auto *BI = dyn_cast<BinaryOperator>(Inst);
>      auto *SI = dyn_cast<SelectInst>(Inst);
>      if (BI || SI) {
> @@ -6735,8 +6742,8 @@ static bool tryToVectorizeHorReductionOr
>        for (auto *Op : Inst->operand_values())
>          if (VisitedInstrs.insert(Op).second)
>            if (auto *I = dyn_cast<Instruction>(Op))
> -            if (!isa<PHINode>(I) && I->getParent() == BB)
> -              Stack.emplace_back(Op, Level);
> +            if (!isa<PHINode>(I) && !R.isDeleted(I) && I->getParent() ==
> BB)
> +              Stack.emplace_back(I, Level);
>    }
>    return Res;
>  }
> @@ -6805,11 +6812,10 @@ bool SLPVectorizerPass::vectorizeCmpInst
>  }
>
>  bool SLPVectorizerPass::vectorizeSimpleInstructions(
> -    SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
> +    SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP
> &R) {
>    bool OpsChanged = false;
> -  for (auto &VH : reverse(Instructions)) {
> -    auto *I = dyn_cast_or_null<Instruction>(VH);
> -    if (!I)
> +  for (auto *I : reverse(Instructions)) {
> +    if (R.isDeleted(I))
>        continue;
>      if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
>        OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
> @@ -6838,7 +6844,7 @@ bool SLPVectorizerPass::vectorizeChainsI
>        if (!P)
>          break;
>
> -      if (!VisitedInstrs.count(P))
> +      if (!VisitedInstrs.count(P) && !R.isDeleted(P))
>          Incoming.push_back(P);
>      }
>
> @@ -6882,9 +6888,12 @@ bool SLPVectorizerPass::vectorizeChainsI
>
>    VisitedInstrs.clear();
>
> -  SmallVector<WeakVH, 8> PostProcessInstructions;
> +  SmallVector<Instruction *, 8> PostProcessInstructions;
>    SmallDenseSet<Instruction *, 4> KeyNodes;
>    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;
> ++it) {
> +    // Skip instructions marked for the deletion.
> +    if (R.isDeleted(&*it))
> +      continue;
>      // We may go through BB multiple times so skip the one we have
> checked.
>      if (!VisitedInstrs.insert(&*it).second) {
>        if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
> @@ -6977,10 +6986,10 @@ bool SLPVectorizerPass::vectorizeGEPIndi
>        SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
>
>        // Some of the candidates may have already been vectorized after we
> -      // initially collected them. If so, the WeakTrackingVHs will have
> -      // nullified the
> -      // values, so remove them from the set of candidates.
> -      Candidates.remove(nullptr);
> +      // initially collected them. If so, they are marked as deleted, so
> remove
> +      // them from the set of candidates.
> +      Candidates.remove_if(
> +          [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
>
>        // Remove from the set of candidates all pairs of getelementptrs
> with
>        // constant differences. Such getelementptrs are likely not good
> @@ -6988,18 +6997,18 @@ bool SLPVectorizerPass::vectorizeGEPIndi
>        // computed from the other. We also ensure all candidate
> getelementptr
>        // indices are unique.
>        for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1;
> ++I) {
> -        auto *GEPI = cast<GetElementPtrInst>(GEPList[I]);
> +        auto *GEPI = GEPList[I];
>          if (!Candidates.count(GEPI))
>            continue;
>          auto *SCEVI = SE->getSCEV(GEPList[I]);
>          for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
> -          auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]);
> +          auto *GEPJ = GEPList[J];
>            auto *SCEVJ = SE->getSCEV(GEPList[J]);
>            if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
> -            Candidates.remove(GEPList[I]);
> -            Candidates.remove(GEPList[J]);
> +            Candidates.remove(GEPI);
> +            Candidates.remove(GEPJ);
>            } else if (GEPI->idx_begin()->get() ==
> GEPJ->idx_begin()->get()) {
> -            Candidates.remove(GEPList[J]);
> +            Candidates.remove(GEPJ);
>            }
>          }
>        }
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll Mon
> Sep 23 09:25:03 2019
> @@ -17,16 +17,8 @@ define void @PR28330(i32 %n) {
>  ; DEFAULT:       for.body:
>  ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
> ], [ 0, [[ENTRY:%.*]] ]
>  ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32>
> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
> -80, i32 -80>
> -; DEFAULT-NEXT:    [[P20:%.*]] = add i32 [[P17]], undef
> -; DEFAULT-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
> -; DEFAULT-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
> -; DEFAULT-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
> -; DEFAULT-NEXT:    [[P28:%.*]] = add i32 [[P26]], undef
> -; DEFAULT-NEXT:    [[P30:%.*]] = add i32 [[P28]], undef
> -; DEFAULT-NEXT:    [[P32:%.*]] = add i32 [[P30]], undef
>  ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
>  ; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
> -; DEFAULT-NEXT:    [[P34:%.*]] = add i32 [[P32]], undef
>  ; DEFAULT-NEXT:    br label [[FOR_BODY]]
>  ;
>  ; GATHER-LABEL: @PR28330(
> @@ -36,37 +28,30 @@ define void @PR28330(i32 %n) {
>  ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
>  ; GATHER:       for.body:
>  ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
> ], [ 0, [[ENTRY:%.*]] ]
> -; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
> -; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1
> [[TMP2]], i32 0
> -; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
> -; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1
> [[TMP4]], i32 1
> -; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
> -; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1
> [[TMP6]], i32 2
> -; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
> -; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1
> [[TMP8]], i32 3
> -; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
> -; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1
> [[TMP10]], i32 4
> -; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
> -; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1
> [[TMP12]], i32 5
> -; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
> -; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1
> [[TMP14]], i32 6
> -; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
> -; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1
> [[TMP16]], i32 7
> +; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
> +; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
> +; GATHER-NEXT:    [[TMP4:%.*]] = insertelement <8 x i1> undef, i1
> [[TMP3]], i32 0
> +; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
> +; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1
> [[TMP5]], i32 1
> +; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
> +; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1
> [[TMP7]], i32 2
> +; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
> +; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1
> [[TMP9]], i32 3
> +; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
> +; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1
> [[TMP11]], i32 4
> +; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
> +; GATHER-NEXT:    [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1
> [[TMP13]], i32 5
> +; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
> +; GATHER-NEXT:    [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1
> [[TMP15]], i32 6
> +; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1
> [[TMP2]], i32 7
>  ; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32>
> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
> -80, i32 -80>
>  ; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 0
> -; GATHER-NEXT:    [[P20:%.*]] = add i32 [[P17]], [[TMP19]]
>  ; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 1
> -; GATHER-NEXT:    [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
>  ; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 2
> -; GATHER-NEXT:    [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
>  ; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 3
> -; GATHER-NEXT:    [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
>  ; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 4
> -; GATHER-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
>  ; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 5
> -; GATHER-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
>  ; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 6
> -; GATHER-NEXT:    [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
>  ; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32
> [[TMP19]], i32 0
>  ; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32
> [[TMP20]], i32 1
>  ; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32
> [[TMP21]], i32 2
> @@ -78,7 +63,6 @@ define void @PR28330(i32 %n) {
>  ; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32
> [[TMP33]], i32 7
>  ; GATHER-NEXT:    [[TMP35:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
>  ; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]]
> -; GATHER-NEXT:    [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
>  ; GATHER-NEXT:    br label [[FOR_BODY]]
>  ;
>  ; MAX-COST-LABEL: @PR28330(
> @@ -169,16 +153,8 @@ define void @PR32038(i32 %n) {
>  ; DEFAULT:       for.body:
>  ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
> ], [ 0, [[ENTRY:%.*]] ]
>  ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32>
> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
> -80, i32 -80>
> -; DEFAULT-NEXT:    [[P20:%.*]] = add i32 -5, undef
> -; DEFAULT-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
> -; DEFAULT-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
> -; DEFAULT-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
> -; DEFAULT-NEXT:    [[P28:%.*]] = add i32 [[P26]], undef
> -; DEFAULT-NEXT:    [[P30:%.*]] = add i32 [[P28]], undef
> -; DEFAULT-NEXT:    [[P32:%.*]] = add i32 [[P30]], undef
>  ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
>  ; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
> -; DEFAULT-NEXT:    [[P34:%.*]] = add i32 [[P32]], undef
>  ; DEFAULT-NEXT:    br label [[FOR_BODY]]
>  ;
>  ; GATHER-LABEL: @PR32038(
> @@ -188,37 +164,30 @@ define void @PR32038(i32 %n) {
>  ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
>  ; GATHER:       for.body:
>  ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
> ], [ 0, [[ENTRY:%.*]] ]
> -; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
> -; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1
> [[TMP2]], i32 0
> -; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
> -; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1
> [[TMP4]], i32 1
> -; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
> -; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1
> [[TMP6]], i32 2
> -; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
> -; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1
> [[TMP8]], i32 3
> -; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
> -; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1
> [[TMP10]], i32 4
> -; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
> -; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1
> [[TMP12]], i32 5
> -; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
> -; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1
> [[TMP14]], i32 6
> -; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
> -; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1
> [[TMP16]], i32 7
> +; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
> +; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
> +; GATHER-NEXT:    [[TMP4:%.*]] = insertelement <8 x i1> undef, i1
> [[TMP3]], i32 0
> +; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
> +; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1
> [[TMP5]], i32 1
> +; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
> +; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1
> [[TMP7]], i32 2
> +; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
> +; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1
> [[TMP9]], i32 3
> +; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
> +; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1
> [[TMP11]], i32 4
> +; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
> +; GATHER-NEXT:    [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1
> [[TMP13]], i32 5
> +; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
> +; GATHER-NEXT:    [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1
> [[TMP15]], i32 6
> +; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1
> [[TMP2]], i32 7
>  ; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32>
> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
> -80, i32 -80>
>  ; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 0
> -; GATHER-NEXT:    [[P20:%.*]] = add i32 -5, [[TMP19]]
>  ; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 1
> -; GATHER-NEXT:    [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
>  ; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 2
> -; GATHER-NEXT:    [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
>  ; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 3
> -; GATHER-NEXT:    [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
>  ; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 4
> -; GATHER-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
>  ; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 5
> -; GATHER-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
>  ; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32
> 6
> -; GATHER-NEXT:    [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
>  ; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32
> [[TMP19]], i32 0
>  ; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32
> [[TMP20]], i32 1
>  ; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32
> [[TMP21]], i32 2
> @@ -230,7 +199,6 @@ define void @PR32038(i32 %n) {
>  ; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32
> [[TMP33]], i32 7
>  ; GATHER-NEXT:    [[TMP35:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
>  ; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], -5
> -; GATHER-NEXT:    [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
>  ; GATHER-NEXT:    br label [[FOR_BODY]]
>  ;
>  ; MAX-COST-LABEL: @PR32038(
> @@ -259,18 +227,12 @@ define void @PR32038(i32 %n) {
>  ; MAX-COST-NEXT:    [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1
> [[P5]], i32 2
>  ; MAX-COST-NEXT:    [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1
> [[P7]], i32 3
>  ; MAX-COST-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32>
> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32
> -80, i32 -80>
> -; MAX-COST-NEXT:    [[P20:%.*]] = add i32 -5, undef
> -; MAX-COST-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
> -; MAX-COST-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
> -; MAX-COST-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
>  ; MAX-COST-NEXT:    [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
> -; MAX-COST-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[P27]]
>  ; MAX-COST-NEXT:    [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
>  ; MAX-COST-NEXT:    [[TMP9:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
>  ; MAX-COST-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]]
>  ; MAX-COST-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]]
>  ; MAX-COST-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5
> -; MAX-COST-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[P29]]
>  ; MAX-COST-NEXT:    [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
>  ; MAX-COST-NEXT:    [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
>  ; MAX-COST-NEXT:    [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll Mon Sep
> 23 09:25:03 2019
> @@ -46,12 +46,8 @@ define i32 @test_select(i32* noalias noc
>  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]],
> zeroinitializer
>  ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer,
> [[TMP4]]
>  ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32>
> [[TMP6]], <4 x i32> [[TMP4]]
> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_026]]
> -; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD]], undef
> -; CHECK-NEXT:    [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef
>  ; CHECK-NEXT:    [[TMP8:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
> -; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef
>  ; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32*
> [[P1_023]], i64 [[IDX_EXT]]
>  ; CHECK-NEXT:    [[ADD_PTR29]] = getelementptr inbounds i32, i32*
> [[P2_024]], i64 [[IDX_EXT]]
>  ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_025]], 1
> @@ -173,12 +169,8 @@ define i32 @reduction_with_br(i32* noali
>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>*
>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]],
> align 4
>  ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_020]]
> -; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD]], undef
> -; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef
>  ; CHECK-NEXT:    [[TMP5:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
> -; CHECK-NEXT:    [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef
>  ; CHECK-NEXT:    [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
>  ; CHECK-NEXT:    br i1 [[CMP14]], label [[IF_END]], label
> [[FOR_END_LOOPEXIT:%.*]]
>  ; CHECK:       if.end:
> @@ -293,16 +285,8 @@ define i32 @test_unrolled_select(i8* noa
>  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]],
> zeroinitializer
>  ; CHECK-NEXT:    [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer,
> [[TMP6]]
>  ; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32>
> [[TMP8]], <8 x i32> [[TMP6]]
> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_047]]
> -; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[ADD]], undef
> -; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef
> -; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef
> -; CHECK-NEXT:    [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef
> -; CHECK-NEXT:    [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef
> -; CHECK-NEXT:    [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef
>  ; CHECK-NEXT:    [[TMP10:%.*]] = call i32
> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
> -; CHECK-NEXT:    [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef
>  ; CHECK-NEXT:    [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
>  ; CHECK-NEXT:    br i1 [[CMP83]], label [[IF_END_86]], label
> [[FOR_END_LOOPEXIT:%.*]]
>  ; CHECK:       if.end.86:
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll Mon
> Sep 23 09:25:03 2019
> @@ -13,11 +13,11 @@ define void @patatino(i64 %n, i64 %i, %s
>  ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[I:%.*]],
> metadata !19, metadata !DIExpression()), !dbg !24
>  ; CHECK-NEXT:    call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]],
> metadata !20, metadata !DIExpression()), !dbg !25
>  ; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]],
> %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26
> -; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata
> !21, metadata !DIExpression()), !dbg !27
> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !21,
> metadata !DIExpression()), !dbg !27
>  ; CHECK-NEXT:    [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]],
> %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg
> !26
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]],
> align 8, !dbg !26, !tbaa !29
> -; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata
> !22, metadata !DIExpression()), !dbg !33
> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !22,
> metadata !DIExpression()), !dbg !33
>  ; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]],
> %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34
>  ; CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]],
> %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35
>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg
> !36
>
> Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll?rev=372626&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll (added)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll Mon Sep 23
> 09:25:03 2019
> @@ -0,0 +1,153 @@
> +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
> +; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s |
> FileCheck %s
> +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
> +
> + at shift = common local_unnamed_addr global [10 x i32] zeroinitializer,
> align 4
> + at data = common local_unnamed_addr global [10 x i8*] zeroinitializer,
> align 4
> +
> +define void @flat(i32 %intensity) {
> +; CHECK-LABEL: @flat(
> +; CHECK-NEXT:  entry:
> +; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds
> ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
> +; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds
> ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
> +; CHECK-NEXT:    [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds
> ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
> +; CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds
> ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
> +; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 1, [[TMP0]]
> +; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8*
> [[TMP2]], i32 [[SHR]]
> +; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 1, [[TMP1]]
> +; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8*
> [[TMP3]], i32 [[SHR1]]
> +; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
> +; CHECK:       for.cond.cleanup:
> +; CHECK-NEXT:    ret void
> +; CHECK:       for.body:
> +; CHECK-NEXT:    [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]]
> ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
> +; CHECK-NEXT:    [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [
> [[INC_1:%.*]], [[FOR_BODY]] ]
> +; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
> +; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
> +; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[CONV]], -128
> +; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
> +; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[TMP5]] to i32
> +; CHECK-NEXT:    [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128
> +; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1
> +; CHECK-NEXT:    [[SUB7:%.*]] = sub nsw i32 128, [[CONV]]
> +; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP5]], i32 [[SUB]], i32
> [[SUB7]]
> +; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[SUB4]], -1
> +; CHECK-NEXT:    [[SUB12:%.*]] = sub nsw i32 128, [[CONV3]]
> +; CHECK-NEXT:    [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32
> [[SUB12]]
> +; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]]
> +; CHECK-NEXT:    [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]]
> +; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8*
> [[D1_DATA_046]], i32 [[IDX_NEG]]
> +; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1
> +; CHECK-NEXT:    [[CONV15:%.*]] = zext i8 [[TMP6]] to i32
> +; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]]
> +; CHECK-NEXT:    [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8
> +; CHECK-NEXT:    store i8 [[CONV17]], i8* [[ADD_PTR]], align 1
> +; CHECK-NEXT:    [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8*
> [[D1_DATA_046]], i32 [[ADD]]
> +; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1
> +; CHECK-NEXT:    [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0
> +; CHECK-NEXT:    [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8
> +; CHECK-NEXT:    store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1
> +; CHECK-NEXT:    [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8*
> [[D1_DATA_046]], i32 [[TMP1]]
> +; CHECK-NEXT:    [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
> +; CHECK-NEXT:    [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32
> +; CHECK-NEXT:    [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128
> +; CHECK-NEXT:    [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
> +; CHECK-NEXT:    [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32
> +; CHECK-NEXT:    [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128
> +; CHECK-NEXT:    [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1
> +; CHECK-NEXT:    [[SUB7_1:%.*]] = sub nsw i32 128, [[CONV_1]]
> +; CHECK-NEXT:    [[COND_1:%.*]] = select i1 [[CMP5_1]], i32 [[SUB_1]],
> i32 [[SUB7_1]]
> +; CHECK-NEXT:    [[CMP8_1:%.*]] = icmp sgt i32 [[SUB4_1]], -1
> +; CHECK-NEXT:    [[SUB12_1:%.*]] = sub nsw i32 128, [[CONV3_1]]
> +; CHECK-NEXT:    [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32 [[SUB4_1]],
> i32 [[SUB12_1]]
> +; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]]
> +; CHECK-NEXT:    [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]]
> +; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8*
> [[ADD_PTR23]], i32 [[IDX_NEG_1]]
> +; CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1
> +; CHECK-NEXT:    [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32
> +; CHECK-NEXT:    [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]]
> +; CHECK-NEXT:    [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8
> +; CHECK-NEXT:    store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1
> +; CHECK-NEXT:    [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8*
> [[ADD_PTR23]], i32 [[ADD_1]]
> +; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1
> +; CHECK-NEXT:    [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0
> +; CHECK-NEXT:    [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8
> +; CHECK-NEXT:    store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1
> +; CHECK-NEXT:    [[ADD_PTR23_1]] = getelementptr inbounds i8, i8*
> [[ADD_PTR23]], i32 [[TMP1]]
> +; CHECK-NEXT:    [[INC_1]] = add nsw i32 [[Y_045]], 2
> +; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128
> +; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]],
> label [[FOR_BODY]]
> +;
> +entry:
> +  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]*
> @shift, i32 0, i32 0), align 4
> +  %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]*
> @shift, i32 0, i32 1), align 4
> +  %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]*
> @data, i32 0, i32 0), align 4
> +  %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]*
> @data, i32 0, i32 1), align 4
> +  %shr = lshr i32 1, %0
> +  %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr
> +  %shr1 = lshr i32 1, %1
> +  %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1
> +  br label %for.body
> +
> +for.cond.cleanup:                                 ; preds = %for.body
> +  ret void
> +
> +for.body:                                         ; preds = %for.body,
> %entry
> +  %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ]
> +  %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ]
> +  %4 = load i8, i8* %arrayidx, align 1
> +  %conv = zext i8 %4 to i32
> +  %sub = add nsw i32 %conv, -128
> +  %5 = load i8, i8* %arrayidx2, align 1
> +  %conv3 = zext i8 %5 to i32
> +  %sub4 = add nsw i32 %conv3, -128
> +  %cmp5 = icmp sgt i32 %sub, -1
> +  %sub7 = sub nsw i32 128, %conv
> +  %cond = select i1 %cmp5, i32 %sub, i32 %sub7
> +  %cmp8 = icmp sgt i32 %sub4, -1
> +  %sub12 = sub nsw i32 128, %conv3
> +  %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12
> +  %add = add nsw i32 %cond14, %cond
> +  %idx.neg = sub nsw i32 0, %add
> +  %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg
> +  %6 = load i8, i8* %add.ptr, align 1
> +  %conv15 = zext i8 %6 to i32
> +  %add16 = add nsw i32 %conv15, %intensity
> +  %conv17 = trunc i32 %add16 to i8
> +  store i8 %conv17, i8* %add.ptr, align 1
> +  %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add
> +  %7 = load i8, i8* %add.ptr18, align 1
> +  %not.tobool = icmp eq i8 %7, 0
> +  %conv21 = zext i1 %not.tobool to i8
> +  store i8 %conv21, i8* %add.ptr18, align 1
> +  %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1
> +  %8 = load i8, i8* %arrayidx, align 1
> +  %conv.1 = zext i8 %8 to i32
> +  %sub.1 = add nsw i32 %conv.1, -128
> +  %9 = load i8, i8* %arrayidx2, align 1
> +  %conv3.1 = zext i8 %9 to i32
> +  %sub4.1 = add nsw i32 %conv3.1, -128
> +  %cmp5.1 = icmp sgt i32 %sub.1, -1
> +  %sub7.1 = sub nsw i32 128, %conv.1
> +  %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1
> +  %cmp8.1 = icmp sgt i32 %sub4.1, -1
> +  %sub12.1 = sub nsw i32 128, %conv3.1
> +  %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1
> +  %add.1 = add nsw i32 %cond14.1, %cond.1
> +  %idx.neg.1 = sub nsw i32 0, %add.1
> +  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1
> +  %10 = load i8, i8* %add.ptr.1, align 1
> +  %conv15.1 = zext i8 %10 to i32
> +  %add16.1 = add nsw i32 %conv15.1, %intensity
> +  %conv17.1 = trunc i32 %add16.1 to i8
> +  store i8 %conv17.1, i8* %add.ptr.1, align 1
> +  %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1
> +  %11 = load i8, i8* %add.ptr18.1, align 1
> +  %not.tobool.1 = icmp eq i8 %11, 0
> +  %conv21.1 = zext i1 %not.tobool.1 to i8
> +  store i8 %conv21.1, i8* %add.ptr18.1, align 1
> +  %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1
> +  %inc.1 = add nsw i32 %y.045, 2
> +  %exitcond.1 = icmp eq i32 %inc.1, 128
> +  br i1 %exitcond.1, label %for.cond.cleanup, label %for.body
> +}
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll Mon Sep 23
> 09:25:03 2019
> @@ -18,23 +18,16 @@ define void @mainTest(i32* %ptr) #0  {
>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
>  ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
>  ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
> -; CHECK-NEXT:    [[TMP9:%.*]] = add i32 1, undef
> -; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]]
> -; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]]
> -; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
> -; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]]
> +; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4
> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]],
> <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]],
> [[RDX_SHUF1]]
> -; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
> i32 0
> -; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
> +; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
> i32 0
> +; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1
>  ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
>  ; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
>  ; CHECK-NEXT:    [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
> -; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP15]], undef
>  ; CHECK-NEXT:    br label [[LOOP]]
>  ; CHECK:       bail_out:
>  ; CHECK-NEXT:    ret void
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll Mon Sep 23
> 09:25:03 2019
> @@ -20,10 +20,6 @@ define void @test() #0 {
>  ; CHECK-NEXT:    [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
>  ; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64
> 1>, [[TMP5]]
>  ; CHECK-NEXT:    [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32,
> i64 32, i64 32, i64 32>
> -; CHECK-NEXT:    [[SUM1:%.*]] = add i64 undef, undef
> -; CHECK-NEXT:    [[SUM2:%.*]] = add i64 [[SUM1]], undef
> -; CHECK-NEXT:    [[ZSUM:%.*]] = add i64 [[SUM2]], 0
> -; CHECK-NEXT:    [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4
> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]],
> <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> @@ -31,7 +27,6 @@ define void @test() #0 {
>  ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
>  ; CHECK-NEXT:    [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
> -; CHECK-NEXT:    [[LAST:%.*]] = add i64 [[JOIN]], undef
>  ; CHECK-NEXT:    br label [[LOOP]]
>  ;
>  entry:
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll Mon Sep 23
> 09:25:03 2019
> @@ -11,40 +11,6 @@ define void @Test(i32) {
>  ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x
> i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1>
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32
> 1
>  ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32
> 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
> -; CHECK-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
> -; CHECK-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
> -; CHECK-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
> -; CHECK-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
> -; CHECK-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
> -; CHECK-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
> -; CHECK-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
> -; CHECK-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -52,7 +18,7 @@ define void @Test(i32) {
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
> +; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]]
>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
>  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
> @@ -79,7 +45,6 @@ define void @Test(i32) {
>  ; CHECK-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
>  ; CHECK-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
>  ; CHECK-NEXT:    [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
> -; CHECK-NEXT:    [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
>  ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> undef, i32
> [[OP_EXTRA30]], i32 0
>  ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32
> 14910, i32 1
>  ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32
> [[TMP2]], i32 0
> @@ -101,40 +66,8 @@ define void @Test(i32) {
>  ; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32>
> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
>  ; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32>
> [[SHUFFLE]], i32 1
>  ; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], <i32
> 0, i32 55, i32 285, i32 1240>
> -; FORCE_REDUCTION-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
> -; FORCE_REDUCTION-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
> -; FORCE_REDUCTION-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
> -; FORCE_REDUCTION-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
>  ; FORCE_REDUCTION-NEXT:    [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
> -; FORCE_REDUCTION-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
>  ; FORCE_REDUCTION-NEXT:    [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
> -; FORCE_REDUCTION-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
>  ; FORCE_REDUCTION-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32>
> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; FORCE_REDUCTION-NEXT:    [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]],
> [[RDX_SHUF]]
>  ; FORCE_REDUCTION-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
> @@ -142,7 +75,7 @@ define void @Test(i32) {
>  ; FORCE_REDUCTION-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32>
> [[BIN_RDX2]], i32 0
>  ; FORCE_REDUCTION-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
>  ; FORCE_REDUCTION-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
> -; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
> +; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]],
> [[TMP0:%.*]]
>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]],
> [[TMP0]]
>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]],
> [[TMP0]]
>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]],
> [[TMP0]]
> @@ -170,7 +103,6 @@ define void @Test(i32) {
>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]],
> [[TMP0]]
>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]],
> [[TMP0]]
>  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]],
> [[TMP2]]
> -; FORCE_REDUCTION-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
>  ; FORCE_REDUCTION-NEXT:    [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
>  ; FORCE_REDUCTION-NEXT:    [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]],
> [[VAL_39]]
>  ; FORCE_REDUCTION-NEXT:    [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll Mon Sep 23
> 09:25:03 2019
> @@ -13,21 +13,6 @@ define void @mainTest(i32 %param, i32 *
>  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]],
> i32 15
>  ; CHECK-NEXT:    store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered,
> align 4
>  ; CHECK-NEXT:    [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32
> 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32
> 4, i32 3, i32 2, i32 1, i32 -1>
> -; CHECK-NEXT:    [[V14:%.*]] = and i32 [[TMP2]], undef
> -; CHECK-NEXT:    [[V16:%.*]] = and i32 undef, [[V14]]
> -; CHECK-NEXT:    [[V18:%.*]] = and i32 undef, [[V16]]
> -; CHECK-NEXT:    [[V20:%.*]] = and i32 undef, [[V18]]
> -; CHECK-NEXT:    [[V22:%.*]] = and i32 undef, [[V20]]
> -; CHECK-NEXT:    [[V24:%.*]] = and i32 undef, [[V22]]
> -; CHECK-NEXT:    [[V26:%.*]] = and i32 undef, [[V24]]
> -; CHECK-NEXT:    [[V28:%.*]] = and i32 undef, [[V26]]
> -; CHECK-NEXT:    [[V30:%.*]] = and i32 undef, [[V28]]
> -; CHECK-NEXT:    [[V32:%.*]] = and i32 undef, [[V30]]
> -; CHECK-NEXT:    [[V34:%.*]] = and i32 undef, [[V32]]
> -; CHECK-NEXT:    [[V36:%.*]] = and i32 undef, [[V34]]
> -; CHECK-NEXT:    [[V38:%.*]] = and i32 undef, [[V36]]
> -; CHECK-NEXT:    [[V40:%.*]] = and i32 undef, [[V38]]
> -; CHECK-NEXT:    [[V42:%.*]] = and i32 undef, [[V40]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]],
> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32>
> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -38,7 +23,6 @@ define void @mainTest(i32 %param, i32 *
>  ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]],
> [[RDX_SHUF5]]
>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
> -; CHECK-NEXT:    [[V43:%.*]] = and i32 undef, [[V42]]
>  ; CHECK-NEXT:    [[V44:%.*]] = add i32 [[TMP2]], 16
>  ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> undef, i32
> [[V44]], i32 0
>  ; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32
> [[OP_EXTRA]], i32 1
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll Mon Sep
> 23 09:25:03 2019
> @@ -30,12 +30,6 @@ define i64 @load_bswap(%v8i8* %p) {
>  ; CHECK-NEXT:    [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24
>  ; CHECK-NEXT:    [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16
>  ; CHECK-NEXT:    [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8
> -; CHECK-NEXT:    [[OR01:%.*]] = or i64 undef, undef
> -; CHECK-NEXT:    [[OR012:%.*]] = or i64 [[OR01]], undef
> -; CHECK-NEXT:    [[OR0123:%.*]] = or i64 [[OR012]], undef
> -; CHECK-NEXT:    [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]]
> -; CHECK-NEXT:    [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]]
> -; CHECK-NEXT:    [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4
> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]],
> <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> @@ -45,7 +39,6 @@ define i64 @load_bswap(%v8i8* %p) {
>  ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]]
>  ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]]
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]]
> -; CHECK-NEXT:    [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]]
>  ; CHECK-NEXT:    ret i64 [[OP_EXTRA]]
>  ;
>    %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
> @@ -108,12 +101,6 @@ define i64 @load_bswap_nop_shift(%v8i8*
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
>  ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
>  ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 56, i64
> 48, i64 40, i64 32, i64 24, i64 16, i64 8, i64 0>
> -; CHECK-NEXT:    [[OR01:%.*]] = or i64 undef, undef
> -; CHECK-NEXT:    [[OR012:%.*]] = or i64 [[OR01]], undef
> -; CHECK-NEXT:    [[OR0123:%.*]] = or i64 [[OR012]], undef
> -; CHECK-NEXT:    [[OR01234:%.*]] = or i64 [[OR0123]], undef
> -; CHECK-NEXT:    [[OR012345:%.*]] = or i64 [[OR01234]], undef
> -; CHECK-NEXT:    [[OR0123456:%.*]] = or i64 [[OR012345]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8
> x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]],
> <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -121,7 +108,6 @@ define i64 @load_bswap_nop_shift(%v8i8*
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i64>
> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[OR01234567:%.*]] = or i64 [[OR0123456]], undef
>  ; CHECK-NEXT:    ret i64 [[TMP5]]
>  ;
>    %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
> @@ -196,12 +182,6 @@ define i64 @load64le(i8* %arg) {
>  ; CHECK-NEXT:    [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40
>  ; CHECK-NEXT:    [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48
>  ; CHECK-NEXT:    [[S7:%.*]] = shl nuw i64 [[Z7]], 56
> -; CHECK-NEXT:    [[O1:%.*]] = or i64 undef, [[Z0]]
> -; CHECK-NEXT:    [[O2:%.*]] = or i64 [[O1]], undef
> -; CHECK-NEXT:    [[O3:%.*]] = or i64 [[O2]], undef
> -; CHECK-NEXT:    [[O4:%.*]] = or i64 [[O3]], undef
> -; CHECK-NEXT:    [[O5:%.*]] = or i64 [[O4]], [[S5]]
> -; CHECK-NEXT:    [[O6:%.*]] = or i64 [[O5]], [[S6]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4
> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]],
> <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> @@ -211,7 +191,6 @@ define i64 @load64le(i8* %arg) {
>  ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]]
>  ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]]
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]]
> -; CHECK-NEXT:    [[O7:%.*]] = or i64 [[O6]], [[S7]]
>  ; CHECK-NEXT:    ret i64 [[OP_EXTRA]]
>  ;
>    %g1 = getelementptr inbounds i8, i8* %arg, i64 1
> @@ -272,12 +251,6 @@ define i64 @load64le_nop_shift(i8* %arg)
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
>  ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
>  ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 0, i64
> 8, i64 16, i64 24, i64 32, i64 40, i64 48, i64 56>
> -; CHECK-NEXT:    [[O1:%.*]] = or i64 undef, undef
> -; CHECK-NEXT:    [[O2:%.*]] = or i64 [[O1]], undef
> -; CHECK-NEXT:    [[O3:%.*]] = or i64 [[O2]], undef
> -; CHECK-NEXT:    [[O4:%.*]] = or i64 [[O3]], undef
> -; CHECK-NEXT:    [[O5:%.*]] = or i64 [[O4]], undef
> -; CHECK-NEXT:    [[O6:%.*]] = or i64 [[O5]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8
> x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]],
> <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -285,7 +258,6 @@ define i64 @load64le_nop_shift(i8* %arg)
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i64>
> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[O7:%.*]] = or i64 [[O6]], undef
>  ; CHECK-NEXT:    ret i64 [[TMP5]]
>  ;
>    %g1 = getelementptr inbounds i8, i8* %arg, i64 1
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll Mon
> Sep 23 09:25:03 2019
> @@ -100,16 +100,8 @@ define float @bazz() {
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast
> ([20 x float]* @arr to <8 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast
> ([20 x float]* @arr1 to <8 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
> -; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>  ; CHECK-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
>  ; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
> -; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
> -; CHECK-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
> -; CHECK-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
> -; CHECK-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]],
> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -119,7 +111,6 @@ define float @bazz() {
>  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> [[CONV6]]
> -; CHECK-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
>  ; CHECK-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
>  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
>  ;
> @@ -131,16 +122,8 @@ define float @bazz() {
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
> bitcast ([20 x float]* @arr to <8 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>*
> bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]],
> [[TMP1]]
> -; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>  ; THRESHOLD-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
>  ; THRESHOLD-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
> -; THRESHOLD-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
> -; THRESHOLD-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
> -; THRESHOLD-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
> -; THRESHOLD-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -150,7 +133,6 @@ define float @bazz() {
>  ; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <8 x float>
> [[BIN_RDX4]], i32 0
>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> [[CONV6]]
> -; THRESHOLD-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
>  ; THRESHOLD-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
>  ;
> @@ -205,17 +187,14 @@ define float @bazzz() {
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
> ([20 x float]* @arr to <4 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast
> ([20 x float]* @arr1 to <4 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
> -; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
> -; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
> -; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
> -; CHECK-NEXT:    store float [[TMP8]], float* @res, align 4
> -; CHECK-NEXT:    ret float [[TMP8]]
> +; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> +; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
> +; CHECK-NEXT:    store float [[TMP5]], float* @res, align 4
> +; CHECK-NEXT:    ret float [[TMP5]]
>  ;
>  ; THRESHOLD-LABEL: @bazzz(
>  ; THRESHOLD-NEXT:  entry:
> @@ -224,17 +203,14 @@ define float @bazzz() {
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>*
> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>*
> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]],
> [[TMP1]]
> -; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
> -; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float>
> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float>
> [[BIN_RDX]], [[RDX_SHUF1]]
> -; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float>
> [[BIN_RDX2]], i32 0
> -; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
> -; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
> -; THRESHOLD-NEXT:    store float [[TMP8]], float* @res, align 4
> -; THRESHOLD-NEXT:    ret float [[TMP8]]
> +; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <4 x float>
> [[BIN_RDX2]], i32 0
> +; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
> +; THRESHOLD-NEXT:    store float [[TMP5]], float* @res, align 4
> +; THRESHOLD-NEXT:    ret float [[TMP5]]
>  ;
>  entry:
>    %0 = load i32, i32* @n, align 4
> @@ -267,16 +243,13 @@ define i32 @foo() {
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
> ([20 x float]* @arr to <4 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast
> ([20 x float]* @arr1 to <4 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
> -; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
> -; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
> -; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
> -; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
> +; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> +; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
> +; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
>  ; CHECK-NEXT:    store i32 [[CONV4]], i32* @n, align 4
>  ; CHECK-NEXT:    ret i32 [[CONV4]]
>  ;
> @@ -287,16 +260,13 @@ define i32 @foo() {
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>*
> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>*
> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]],
> [[TMP1]]
> -; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
> -; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float>
> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float>
> [[BIN_RDX]], [[RDX_SHUF1]]
> -; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float>
> [[BIN_RDX2]], i32 0
> -; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
> -; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
> -; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
> +; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <4 x float>
> [[BIN_RDX2]], i32 0
> +; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
> +; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
>  ; THRESHOLD-NEXT:    store i32 [[CONV4]], i32* @n, align 4
>  ; THRESHOLD-NEXT:    ret i32 [[CONV4]]
>  ;
> @@ -330,11 +300,6 @@ define float @bar() {
>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast
> ([20 x float]* @arr to <4 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
> ([20 x float]* @arr1 to <4 x float>*), align 16
>  ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
> -; CHECK-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
> -; CHECK-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef,
> float undef
> -; CHECK-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]],
> undef
> -; CHECK-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float
> [[MAX_0_MUL3]], float undef
> -; CHECK-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]],
> undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float>
> [[TMP2]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
> @@ -342,7 +307,6 @@ define float @bar() {
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float>
> [[RDX_SHUF1]]
>  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; CHECK-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float
> [[MAX_0_MUL3_1]], float undef
>  ; CHECK-NEXT:    store float [[TMP3]], float* @res, align 4
>  ; CHECK-NEXT:    ret float [[TMP3]]
>  ;
> @@ -351,11 +315,6 @@ define float @bar() {
>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>*
> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>*
> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]],
> [[TMP0]]
> -; THRESHOLD-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
> -; THRESHOLD-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float
> undef, float undef
> -; THRESHOLD-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]],
> undef
> -; THRESHOLD-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float
> [[MAX_0_MUL3]], float undef
> -; THRESHOLD-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float
> [[MAX_0_MUL3_1]], undef
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float>
> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float>
> [[TMP2]], [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
> @@ -363,7 +322,6 @@ define float @bar() {
>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float>
> [[RDX_SHUF1]]
>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = extractelement <4 x float>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; THRESHOLD-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float
> [[MAX_0_MUL3_1]], float undef
>  ; THRESHOLD-NEXT:    store float [[TMP3]], float* @res, align 4
>  ; THRESHOLD-NEXT:    ret float [[TMP3]]
>  ;
> @@ -410,21 +368,6 @@ define float @f(float* nocapture readonl
>  ; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 15
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>*
> [[TMP0]], align 4
> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
> -; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
> -; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
> -; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
> -; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
> -; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
> -; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
> -; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
> -; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
> -; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
> -; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>  ; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 16
>  ; CHECK-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 17
>  ; CHECK-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 18
> @@ -459,37 +402,6 @@ define float @f(float* nocapture readonl
>  ; CHECK-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 47
>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x
> float>*
>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>*
> [[TMP2]], align 4
> -; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
> -; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
> -; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
> -; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
> -; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
> -; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
> -; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
> -; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
> -; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
> -; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
> -; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
> -; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
> -; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
> -; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
> -; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
> -; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
> -; CHECK-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
> -; CHECK-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
> -; CHECK-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
> -; CHECK-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
> -; CHECK-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
> -; CHECK-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
> -; CHECK-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
> -; CHECK-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
> -; CHECK-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
> -; CHECK-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
> -; CHECK-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
> -; CHECK-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
> -; CHECK-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
> -; CHECK-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
> -; CHECK-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]],
> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -511,7 +423,6 @@ define float @f(float* nocapture readonl
>  ; CHECK-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float>
> [[BIN_RDX14]], [[RDX_SHUF15]]
>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float>
> [[BIN_RDX16]], i32 0
>  ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
> -; CHECK-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
>  ; CHECK-NEXT:    ret float [[OP_RDX]]
>  ;
>  ; THRESHOLD-LABEL: @f(
> @@ -533,21 +444,6 @@ define float @f(float* nocapture readonl
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 15
>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>*
> [[TMP0]], align 4
> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
> -; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
> -; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
> -; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
> -; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
> -; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
> -; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
> -; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
> -; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
> -; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
> -; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 16
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 17
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 18
> @@ -582,37 +478,6 @@ define float @f(float* nocapture readonl
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 47
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32
> x float>*
>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>*
> [[TMP2]], align 4
> -; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
> -; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
> -; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
> -; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
> -; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
> -; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
> -; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
> -; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
> -; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
> -; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
> -; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
> -; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
> -; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
> -; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
> -; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
> -; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
> -; THRESHOLD-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
> -; THRESHOLD-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
> -; THRESHOLD-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
> -; THRESHOLD-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
> -; THRESHOLD-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
> -; THRESHOLD-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
> -; THRESHOLD-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
> -; THRESHOLD-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
> -; THRESHOLD-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
> -; THRESHOLD-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
> -; THRESHOLD-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
> -; THRESHOLD-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
> -; THRESHOLD-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
> -; THRESHOLD-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
> -; THRESHOLD-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float>
> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19,
> i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32
> 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -634,7 +499,6 @@ define float @f(float* nocapture readonl
>  ; THRESHOLD-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float>
> [[BIN_RDX14]], [[RDX_SHUF15]]
>  ; THRESHOLD-NEXT:    [[TMP5:%.*]] = extractelement <16 x float>
> [[BIN_RDX16]], i32 0
>  ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
> -; THRESHOLD-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
>  ; THRESHOLD-NEXT:    ret float [[OP_RDX]]
>  ;
>    entry:
> @@ -821,37 +685,6 @@ define float @f1(float* nocapture readon
>  ; CHECK-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 31
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>*
> [[TMP0]], align 4
> -; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
> -; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
> -; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
> -; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
> -; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
> -; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
> -; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
> -; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
> -; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
> -; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
> -; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
> -; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
> -; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
> -; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
> -; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
> -; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
> -; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
> -; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
> -; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
> -; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
> -; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
> -; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
> -; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
> -; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
> -; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
> -; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]],
> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -864,7 +697,6 @@ define float @f1(float* nocapture readon
>  ; CHECK-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]],
> [[RDX_SHUF7]]
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
> -; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>  ; CHECK-NEXT:    ret float [[OP_EXTRA]]
>  ;
>  ; THRESHOLD-LABEL: @f1(
> @@ -904,37 +736,6 @@ define float @f1(float* nocapture readon
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 31
>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>*
> [[TMP0]], align 4
> -; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
> -; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
> -; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
> -; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
> -; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
> -; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
> -; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
> -; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
> -; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
> -; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
> -; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
> -; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
> -; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
> -; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
> -; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
> -; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
> -; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
> -; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
> -; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
> -; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
> -; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
> -; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
> -; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
> -; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
> -; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
> -; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float>
> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19,
> i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32
> 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -947,7 +748,6 @@ define float @f1(float* nocapture readon
>  ; THRESHOLD-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float>
> [[BIN_RDX6]], [[RDX_SHUF7]]
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <32 x float>
> [[BIN_RDX8]], i32 0
>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA]]
>  ;
>    entry:
> @@ -1058,17 +858,12 @@ define float @loadadd31(float* nocapture
>  ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
>  ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 2
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
> -; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
>  ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 3
>  ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 4
>  ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 5
>  ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 6
>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x
> float>*
>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]],
> align 4
> -; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>  ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 8
>  ; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 9
> @@ -1079,14 +874,6 @@ define float @loadadd31(float* nocapture
>  ; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 14
>  ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x
> float>*
>  ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
> align 4
> -; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
> -; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
> -; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
> -; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
> -; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
> -; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
> -; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
> -; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>  ; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 15
>  ; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 16
>  ; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 17
> @@ -1105,21 +892,6 @@ define float @loadadd31(float* nocapture
>  ; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 30
>  ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x
> float>*
>  ; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>*
> [[TMP6]], align 4
> -; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
> -; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
> -; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
> -; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
> -; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
> -; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
> -; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
> -; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
> -; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
> -; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
> -; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
> -; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
> -; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
> -; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
> -; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]],
> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1145,7 +917,6 @@ define float @loadadd31(float* nocapture
>  ; CHECK-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
>  ; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
>  ; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
> -; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>  ; CHECK-NEXT:    ret float [[TMP12]]
>  ;
>  ; THRESHOLD-LABEL: @loadadd31(
> @@ -1154,17 +925,12 @@ define float @loadadd31(float* nocapture
>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]],
> align 4
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 2
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]],
> align 4
> -; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 3
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 4
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 5
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 6
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x
> float>*
>  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>*
> [[TMP2]], align 4
> -; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 8
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 9
> @@ -1175,14 +941,6 @@ define float @loadadd31(float* nocapture
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 14
>  ; THRESHOLD-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x
> float>*
>  ; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>*
> [[TMP4]], align 4
> -; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
> -; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
> -; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
> -; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
> -; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
> -; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
> -; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
> -; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 15
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 16
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 17
> @@ -1201,21 +959,6 @@ define float @loadadd31(float* nocapture
>  ; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 30
>  ; THRESHOLD-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16
> x float>*
>  ; THRESHOLD-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>*
> [[TMP6]], align 4
> -; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
> -; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
> -; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
> -; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
> -; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
> -; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
> -; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
> -; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
> -; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
> -; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
> -; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
> -; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
> -; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
> -; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
> -; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float>
> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32
> 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1241,7 +984,6 @@ define float @loadadd31(float* nocapture
>  ; THRESHOLD-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]],
> [[TMP10]]
>  ; THRESHOLD-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]],
> [[TMP1]]
>  ; THRESHOLD-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
> -; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>  ; THRESHOLD-NEXT:    ret float [[TMP12]]
>  ;
>    entry:
> @@ -1352,14 +1094,6 @@ define float @extra_args(float* nocaptur
>  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
> -; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
> -; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
> -; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
> -; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
> -; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
> -; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1369,7 +1103,6 @@ define float @extra_args(float* nocaptur
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> [[CONV]]
> -; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
>  ;
>  ; THRESHOLD-LABEL: @extra_args(
> @@ -1386,14 +1119,6 @@ define float @extra_args(float* nocaptur
>  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
> [[TMP0]], align 4
> -; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
> -; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
> -; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
> -; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
> -; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
> -; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
> -; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1403,7 +1128,6 @@ define float @extra_args(float* nocaptur
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float>
> [[BIN_RDX4]], i32 0
>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
>  ;
>    entry:
> @@ -1452,16 +1176,6 @@ define float @extra_args_same_several_ti
>  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
> -; CHECK-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
> -; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
> -; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
> -; CHECK-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]],
> 5.000000e+00
> -; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
> -; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
> -; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
> -; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1473,7 +1187,6 @@ define float @extra_args_same_several_ti
>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> 5.000000e+00
>  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]],
> 5.000000e+00
>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]],
> [[CONV]]
> -; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>  ; CHECK-NEXT:    ret float [[OP_EXTRA7]]
>  ;
>  ; THRESHOLD-LABEL: @extra_args_same_several_times(
> @@ -1490,16 +1203,6 @@ define float @extra_args_same_several_ti
>  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
> [[TMP0]], align 4
> -; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
> -; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
> -; THRESHOLD-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]],
> 5.000000e+00
> -; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
> -; THRESHOLD-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]],
> 5.000000e+00
> -; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
> -; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
> -; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
> -; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1511,7 +1214,6 @@ define float @extra_args_same_several_ti
>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> 5.000000e+00
>  ; THRESHOLD-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]],
> 5.000000e+00
>  ; THRESHOLD-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]],
> [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA7]]
>  ;
>    entry:
> @@ -1564,14 +1266,6 @@ define float @extra_args_no_replace(floa
>  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
> -; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
> -; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
> -; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
> -; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
> -; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
> -; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1581,7 +1275,6 @@ define float @extra_args_no_replace(floa
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> [[CONV]]
> -; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
>  ;
>  ; THRESHOLD-LABEL: @extra_args_no_replace(
> @@ -1600,14 +1293,6 @@ define float @extra_args_no_replace(floa
>  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
> float* [[X]], i64 7
>  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>*
> [[TMP0]], align 4
> -; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
> -; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
> -; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
> -; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
> -; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
> -; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
> -; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float>
> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1617,7 +1302,6 @@ define float @extra_args_no_replace(floa
>  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float>
> [[BIN_RDX4]], i32 0
>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
> [[CONV]]
> -; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
>  ;
>    entry:
> @@ -1668,10 +1352,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>  ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
>  ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]],
> zeroinitializer
>  ; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
> -; CHECK-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
> -; CHECK-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
> -; CHECK-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
> -; CHECK-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4
> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]],
> <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> @@ -1679,7 +1359,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>  ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
>  ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
> -; CHECK-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA3]]
>  ;
>  ; THRESHOLD-LABEL: @wobble(
> @@ -1696,10 +1375,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>  ; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]],
> i32 3
>  ; THRESHOLD-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]],
> zeroinitializer
>  ; THRESHOLD-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
> -; THRESHOLD-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
> -; THRESHOLD-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
> -; THRESHOLD-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
> -; THRESHOLD-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
>  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32>
> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]],
> [[RDX_SHUF]]
>  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
> @@ -1707,7 +1382,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>  ; THRESHOLD-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32>
> [[BIN_RDX2]], i32 0
>  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
>  ; THRESHOLD-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]],
> [[TMP9]]
> -; THRESHOLD-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
>  ; THRESHOLD-NEXT:    ret i32 [[OP_EXTRA3]]
>  ;
>    bb:
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll Mon
> Sep 23 09:25:03 2019
> @@ -12,19 +12,6 @@
>  define i32 @maxi8(i32) {
>  ; CHECK-LABEL: @maxi8(
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x
> i32]* @arr to <8 x i32>*), align 16
> -; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
> undef
> -; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
> undef
> -; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> undef
> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]]
> @@ -34,9 +21,8 @@ define i32 @maxi8(i32) {
>  ; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x i32>
> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32>
> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
> [[RDX_SHUF4]]
> -; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32>
> [[RDX_MINMAX_SELECT6]], i32 0
> -; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
> undef
> -; CHECK-NEXT:    ret i32 [[TMP16]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32>
> [[RDX_MINMAX_SELECT6]], i32 0
> +; CHECK-NEXT:    ret i32 [[TMP3]]
>  ;
>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 0), align 16
>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 1), align 4
> @@ -66,35 +52,6 @@ define i32 @maxi8(i32) {
>  define i32 @maxi16(i32) {
>  ; CHECK-LABEL: @maxi16(
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32
> x i32]* @arr to <16 x i32>*), align 16
> -; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
> undef
> -; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
> undef
> -; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> undef
> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
> undef
> -; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
> undef
> -; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
> undef
> -; CHECK-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
> undef
> -; CHECK-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
> undef
> -; CHECK-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
> undef
> -; CHECK-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
> undef
> -; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
> undef
> -; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]],
> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1>
> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]]
> @@ -107,9 +64,8 @@ define i32 @maxi16(i32) {
>  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x i32>
> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32>
> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1>
> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32>
> [[RDX_SHUF7]]
> -; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x i32>
> [[RDX_MINMAX_SELECT9]], i32 0
> -; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
> undef
> -; CHECK-NEXT:    ret i32 [[TMP32]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i32>
> [[RDX_MINMAX_SELECT9]], i32 0
> +; CHECK-NEXT:    ret i32 [[TMP3]]
>  ;
>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 0), align 16
>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 1), align 4
> @@ -163,67 +119,6 @@ define i32 @maxi16(i32) {
>  define i32 @maxi32(i32) {
>  ; CHECK-LABEL: @maxi32(
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32
> x i32]* @arr to <32 x i32>*), align 16
> -; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
> undef
> -; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
> undef
> -; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> undef
> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
> undef
> -; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
> undef
> -; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
> undef
> -; CHECK-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
> undef
> -; CHECK-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
> undef
> -; CHECK-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
> undef
> -; CHECK-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
> undef
> -; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
> undef
> -; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
> -; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
> undef
> -; CHECK-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef
> -; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32
> undef
> -; CHECK-NEXT:    [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef
> -; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32
> undef
> -; CHECK-NEXT:    [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef
> -; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32
> undef
> -; CHECK-NEXT:    [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef
> -; CHECK-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32
> undef
> -; CHECK-NEXT:    [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef
> -; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32
> undef
> -; CHECK-NEXT:    [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef
> -; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32
> undef
> -; CHECK-NEXT:    [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef
> -; CHECK-NEXT:    [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32
> undef
> -; CHECK-NEXT:    [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef
> -; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32
> undef
> -; CHECK-NEXT:    [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef
> -; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32
> undef
> -; CHECK-NEXT:    [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef
> -; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32
> undef
> -; CHECK-NEXT:    [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef
> -; CHECK-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32
> undef
> -; CHECK-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef
> -; CHECK-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32
> undef
> -; CHECK-NEXT:    [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef
> -; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32
> undef
> -; CHECK-NEXT:    [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef
> -; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32
> undef
> -; CHECK-NEXT:    [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef
> -; CHECK-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32
> undef
> -; CHECK-NEXT:    [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]],
> <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1>
> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]]
> @@ -239,9 +134,8 @@ define i32 @maxi32(i32) {
>  ; CHECK-NEXT:    [[RDX_SHUF10:%.*]] = shufflevector <32 x i32>
> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32>
> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1>
> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32>
> [[RDX_SHUF10]]
> -; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <32 x i32>
> [[RDX_MINMAX_SELECT12]], i32 0
> -; CHECK-NEXT:    [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32
> undef
> -; CHECK-NEXT:    ret i32 [[TMP64]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <32 x i32>
> [[RDX_MINMAX_SELECT12]], i32 0
> +; CHECK-NEXT:    ret i32 [[TMP3]]
>  ;
>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 0), align 16
>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 1), align 4
> @@ -343,19 +237,6 @@ define i32 @maxi32(i32) {
>  define float @maxf8(float) {
>  ; CHECK-LABEL: @maxf8(
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast
> ([32 x float]* @arr1 to <8 x float>*), align 16
> -; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
> undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float
> undef
> -; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float
> undef
> -; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
> float undef
> -; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
> float undef
> -; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
> float undef
> -; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]],
> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float>
> [[TMP2]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
> @@ -365,9 +246,8 @@ define float @maxf8(float) {
>  ; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x float>
> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float>
> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float>
> [[RDX_SHUF4]]
> -; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x float>
> [[RDX_MINMAX_SELECT6]], i32 0
> -; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]],
> float undef
> -; CHECK-NEXT:    ret float [[TMP16]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float>
> [[RDX_MINMAX_SELECT6]], i32 0
> +; CHECK-NEXT:    ret float [[TMP3]]
>  ;
>    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
> float]* @arr1, i64 0, i64 0), align 16
>    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
> float]* @arr1, i64 0, i64 1), align 4
> @@ -397,35 +277,6 @@ define float @maxf8(float) {
>  define float @maxf16(float) {
>  ; CHECK-LABEL: @maxf16(
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast
> ([32 x float]* @arr1 to <16 x float>*), align 16
> -; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
> undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float
> undef
> -; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float
> undef
> -; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
> float undef
> -; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
> float undef
> -; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
> float undef
> -; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]],
> float undef
> -; CHECK-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]],
> float undef
> -; CHECK-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]],
> float undef
> -; CHECK-NEXT:    [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]],
> float undef
> -; CHECK-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]],
> float undef
> -; CHECK-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]],
> float undef
> -; CHECK-NEXT:    [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]],
> float undef
> -; CHECK-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]],
> float undef
> -; CHECK-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]],
> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float>
> [[TMP2]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1>
> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
> @@ -438,9 +289,8 @@ define float @maxf16(float) {
>  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x float>
> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float>
> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1>
> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float>
> [[RDX_SHUF7]]
> -; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x float>
> [[RDX_MINMAX_SELECT9]], i32 0
> -; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]],
> float undef
> -; CHECK-NEXT:    ret float [[TMP32]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float>
> [[RDX_MINMAX_SELECT9]], i32 0
> +; CHECK-NEXT:    ret float [[TMP3]]
>  ;
>    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
> float]* @arr1, i64 0, i64 0), align 16
>    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
> float]* @arr1, i64 0, i64 1), align 4
> @@ -494,67 +344,6 @@ define float @maxf16(float) {
>  define float @maxf32(float) {
>  ; CHECK-LABEL: @maxf32(
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast
> ([32 x float]* @arr1 to <32 x float>*), align 16
> -; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
> -; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
> undef
> -; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
> -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float
> undef
> -; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
> -; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float
> undef
> -; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
> -; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
> float undef
> -; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
> float undef
> -; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
> float undef
> -; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
> -; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]],
> float undef
> -; CHECK-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]],
> float undef
> -; CHECK-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
> -; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]],
> float undef
> -; CHECK-NEXT:    [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
> -; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]],
> float undef
> -; CHECK-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
> -; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]],
> float undef
> -; CHECK-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
> -; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]],
> float undef
> -; CHECK-NEXT:    [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
> -; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]],
> float undef
> -; CHECK-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
> -; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]],
> float undef
> -; CHECK-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
> -; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]],
> float undef
> -; CHECK-NEXT:    [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef
> -; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]],
> float undef
> -; CHECK-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef
> -; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]],
> float undef
> -; CHECK-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef
> -; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]],
> float undef
> -; CHECK-NEXT:    [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef
> -; CHECK-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]],
> float undef
> -; CHECK-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef
> -; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]],
> float undef
> -; CHECK-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef
> -; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]],
> float undef
> -; CHECK-NEXT:    [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef
> -; CHECK-NEXT:    [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]],
> float undef
> -; CHECK-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef
> -; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]],
> float undef
> -; CHECK-NEXT:    [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef
> -; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]],
> float undef
> -; CHECK-NEXT:    [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef
> -; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]],
> float undef
> -; CHECK-NEXT:    [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef
> -; CHECK-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]],
> float undef
> -; CHECK-NEXT:    [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef
> -; CHECK-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]],
> float undef
> -; CHECK-NEXT:    [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef
> -; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]],
> float undef
> -; CHECK-NEXT:    [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef
> -; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]],
> float undef
> -; CHECK-NEXT:    [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef
> -; CHECK-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]],
> float undef
> -; CHECK-NEXT:    [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]],
> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float>
> [[TMP2]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1>
> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]]
> @@ -570,9 +359,8 @@ define float @maxf32(float) {
>  ; CHECK-NEXT:    [[RDX_SHUF10:%.*]] = shufflevector <32 x float>
> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> <i32 1, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float>
> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1>
> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float>
> [[RDX_SHUF10]]
> -; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <32 x float>
> [[RDX_MINMAX_SELECT12]], i32 0
> -; CHECK-NEXT:    [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]],
> float undef
> -; CHECK-NEXT:    ret float [[TMP64]]
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <32 x float>
> [[RDX_MINMAX_SELECT12]], i32 0
> +; CHECK-NEXT:    ret float [[TMP3]]
>  ;
>    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
> float]* @arr1, i64 0, i64 0), align 16
>    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
> float]* @arr1, i64 0, i64 1), align 4
> @@ -678,34 +466,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>  ; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>  ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
> [[TMP3]]
>  ; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
> i32>*), align 8
> -; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
> -; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
> -; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
> -; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; SSE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> -; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
> +; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>  ; SSE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; SSE-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
> [[RDX_SHUF]]
>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>  ; SSE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
> undef, i32 undef>
>  ; SSE-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
> [[RDX_SHUF1]]
> -; SSE-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; SSE-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
> -; SSE-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
> [[TMP15]]
> -; SSE-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
> -; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32
> [[TMP5]]
> -; SSE-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
> [[TMP15]]
> -; SSE-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> -; SSE-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
> -; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32
> [[TMP22]]
> -; SSE-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
> -; SSE-NEXT:    store i32 [[TMP25]], i32* @var, align 8
> -; SSE-NEXT:    ret i32 [[TMP24]]
> +; SSE-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> +; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
> +; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> [[TMP7]]
> +; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
> +; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> [[TMP5]]
> +; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> +; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
> +; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32
> [[TMP12]]
> +; SSE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
> +; SSE-NEXT:    store i32 [[TMP15]], i32* @var, align 8
> +; SSE-NEXT:    ret i32 [[TMP14]]
>  ;
>  ; AVX-LABEL: @maxi8_mutiple_uses(
>  ; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
> @@ -713,34 +491,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>  ; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>  ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
> [[TMP3]]
>  ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
> i32>*), align 8
> -; AVX-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
> -; AVX-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
> -; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
> -; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; AVX-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> -; AVX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
> +; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>  ; AVX-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; AVX-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
> [[RDX_SHUF]]
>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>  ; AVX-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
> undef, i32 undef>
>  ; AVX-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
> [[RDX_SHUF1]]
> -; AVX-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; AVX-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
> -; AVX-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
> [[TMP15]]
> -; AVX-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
> -; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32
> [[TMP5]]
> -; AVX-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
> [[TMP15]]
> -; AVX-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> -; AVX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
> -; AVX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32
> [[TMP22]]
> -; AVX-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
> -; AVX-NEXT:    store i32 [[TMP25]], i32* @var, align 8
> -; AVX-NEXT:    ret i32 [[TMP24]]
> +; AVX-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> +; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
> +; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> [[TMP7]]
> +; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
> +; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> [[TMP5]]
> +; AVX-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> +; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
> +; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32
> [[TMP12]]
> +; AVX-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
> +; AVX-NEXT:    store i32 [[TMP15]], i32* @var, align 8
> +; AVX-NEXT:    ret i32 [[TMP14]]
>  ;
>  ; AVX2-LABEL: @maxi8_mutiple_uses(
>  ; AVX2-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
> @@ -748,34 +516,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>  ; AVX2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>  ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
> [[TMP3]]
>  ; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
> i32>*), align 8
> -; AVX2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
> -; AVX2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
> -; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> undef
> -; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; AVX2-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> -; AVX2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
> +; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>  ; AVX2-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
> [[RDX_SHUF]]
>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>  ; AVX2-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
> undef, i32 undef>
>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
> [[RDX_SHUF1]]
> -; AVX2-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; AVX2-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
> -; AVX2-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
> [[TMP15]]
> -; AVX2-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
> -; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]],
> i32 [[TMP5]]
> -; AVX2-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
> [[TMP15]]
> -; AVX2-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds
> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> -; AVX2-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
> -; AVX2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]],
> i32 [[TMP22]]
> -; AVX2-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
> -; AVX2-NEXT:    store i32 [[TMP25]], i32* @var, align 8
> -; AVX2-NEXT:    ret i32 [[TMP24]]
> +; AVX2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> +; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
> +; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> [[TMP7]]
> +; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
> +; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]],
> i32 [[TMP5]]
> +; AVX2-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds
> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> +; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
> +; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]],
> i32 [[TMP12]]
> +; AVX2-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
> +; AVX2-NEXT:    store i32 [[TMP15]], i32* @var, align 8
> +; AVX2-NEXT:    ret i32 [[TMP14]]
>  ;
>  ; SKX-LABEL: @maxi8_mutiple_uses(
>  ; SKX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x
> i32]* @arr to <2 x i32>*), align 16
> @@ -797,26 +555,16 @@ define i32 @maxi8_mutiple_uses(i32) {
>  ; SKX-NEXT:    [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
>  ; SKX-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32>
> [[TMP9]], <2 x i32> [[TMP11]]
>  ; SKX-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
> -; SKX-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
> -; SKX-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
> undef
> -; SKX-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
> -; SKX-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
> undef
> -; SKX-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
> -; SKX-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
> undef
> -; SKX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
> -; SKX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
> undef
> -; SKX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP6]]
> -; SKX-NEXT:    [[TMP24:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
> -; SKX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP14]]
> -; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
> [[TMP14]]
> -; SKX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
> [[TMP6]]
> -; SKX-NEXT:    [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> -; SKX-NEXT:    [[TMP28:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP27]]
> -; SKX-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[OP_EXTRA]], i32
> [[TMP27]]
> -; SKX-NEXT:    [[TMP30:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
> -; SKX-NEXT:    [[TMP31:%.*]] = select i1 [[TMP30]], i32 3, i32 4
> -; SKX-NEXT:    store i32 [[TMP31]], i32* @var, align 8
> -; SKX-NEXT:    ret i32 [[TMP29]]
> +; SKX-NEXT:    [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
> +; SKX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
> +; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32
> [[TMP14]]
> +; SKX-NEXT:    [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> +; SKX-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
> +; SKX-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32
> [[TMP17]]
> +; SKX-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
> +; SKX-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
> +; SKX-NEXT:    store i32 [[TMP21]], i32* @var, align 8
> +; SKX-NEXT:    ret i32 [[TMP19]]
>  ;
>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 0), align 16
>    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 1), align 4
> @@ -854,33 +602,21 @@ define i32 @maxi8_wrong_parent(i32) {
>  ; SSE:       pp:
>  ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
> [[TMP3]]
>  ; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
> i32>*), align 8
> -; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
> -; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
> -; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
> -; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; SSE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> -; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
> -; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
> [[TMP15]]
> -; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> -; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
> +; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> +; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>  ; SSE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; SSE-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
> [[RDX_SHUF]]
>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>  ; SSE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
> undef, i32 undef>
>  ; SSE-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; SSE-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
> [[RDX_SHUF1]]
> -; SSE-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; SSE-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
> -; SSE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
> [[TMP15]]
> -; SSE-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
> -; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
> [[TMP18]]
> -; SSE-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
> -; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
> [[TMP5]]
> -; SSE-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
> [[TMP18]]
> +; SSE-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> +; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
> +; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
> [[TMP7]]
> +; SSE-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
> +; SSE-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
> [[TMP8]]
> +; SSE-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
> +; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32
> [[TMP5]]
>  ; SSE-NEXT:    ret i32 [[OP_EXTRA]]
>  ;
>  ; AVX-LABEL: @maxi8_wrong_parent(
> @@ -891,33 +627,21 @@ define i32 @maxi8_wrong_parent(i32) {
>  ; AVX:       pp:
>  ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
> [[TMP3]]
>  ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
> i32>*), align 8
> -; AVX-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
> -; AVX-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
> -; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
> -; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; AVX-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> -; AVX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
> -; AVX-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
> [[TMP15]]
> -; AVX-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> -; AVX-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
> +; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> +; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>  ; AVX-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; AVX-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
> [[RDX_SHUF]]
>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>  ; AVX-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
> undef, i32 undef>
>  ; AVX-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; AVX-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
> [[RDX_SHUF1]]
> -; AVX-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; AVX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
> -; AVX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
> [[TMP15]]
> -; AVX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
> -; AVX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
> [[TMP18]]
> -; AVX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
> -; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
> [[TMP5]]
> -; AVX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
> [[TMP18]]
> +; AVX-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> +; AVX-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
> +; AVX-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
> [[TMP7]]
> +; AVX-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
> +; AVX-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
> [[TMP8]]
> +; AVX-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
> +; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32
> [[TMP5]]
>  ; AVX-NEXT:    ret i32 [[OP_EXTRA]]
>  ;
>  ; AVX2-LABEL: @maxi8_wrong_parent(
> @@ -928,33 +652,21 @@ define i32 @maxi8_wrong_parent(i32) {
>  ; AVX2:       pp:
>  ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
> [[TMP3]]
>  ; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
> i32>*), align 8
> -; AVX2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
> -; AVX2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
> -; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
> -; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
> undef
> -; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
> -; AVX2-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
> undef
> -; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
> undef
> -; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> -; AVX2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
> -; AVX2-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
> [[TMP15]]
> -; AVX2-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds
> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
> -; AVX2-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
> +; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
> +; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32
> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>  ; AVX2-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
> [[RDX_SHUF]]
>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>  ; AVX2-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
> undef, i32 undef>
>  ; AVX2-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
> [[RDX_SHUF1]]
> -; AVX2-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> -; AVX2-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
> -; AVX2-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
> [[TMP15]]
> -; AVX2-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
> -; AVX2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
> [[TMP18]]
> -; AVX2-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
> -; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
> i32 [[TMP5]]
> -; AVX2-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
> [[TMP18]]
> +; AVX2-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT3]], i32 0
> +; AVX2-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
> +; AVX2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
> [[TMP7]]
> +; AVX2-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
> +; AVX2-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
> [[TMP8]]
> +; AVX2-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
> +; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]],
> i32 [[TMP5]]
>  ; AVX2-NEXT:    ret i32 [[OP_EXTRA]]
>  ;
>  ; SKX-LABEL: @maxi8_wrong_parent(
> @@ -985,21 +697,9 @@ define i32 @maxi8_wrong_parent(i32) {
>  ; SKX-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32
> [[TMP4]], i32 1
>  ; SKX-NEXT:    [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32>
> [[TMP16]], <2 x i32> [[TMP18]]
>  ; SKX-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
> -; SKX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
> -; SKX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
> undef
> -; SKX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
> -; SKX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
> undef
> -; SKX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
> -; SKX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
> undef
> -; SKX-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
> -; SKX-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
> undef
> -; SKX-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]]
> -; SKX-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
> [[TMP7]]
> -; SKX-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]]
> -; SKX-NEXT:    [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
> -; SKX-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]]
> -; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32
> [[TMP20]]
> -; SKX-NEXT:    [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
> [[TMP8]]
> +; SKX-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
> +; SKX-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
> +; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32
> [[TMP20]]
>  ; SKX-NEXT:    ret i32 [[OP_EXTRA]]
>  ;
>    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
> @arr, i64 0, i64 0), align 16
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll Mon Sep 23
> 09:25:03 2019
> @@ -37,14 +37,11 @@ define i32 @add_red(float* %A, i32 %n) {
>  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x
> float>*
>  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]],
> align 4
>  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float
> 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
> -; CHECK-NEXT:    [[ADD6:%.*]] = fadd fast float undef, undef
> -; CHECK-NEXT:    [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; CHECK-NEXT:    [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
>  ; CHECK-NEXT:    [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_033]], 1
>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
> @@ -77,14 +74,11 @@ define i32 @add_red(float* %A, i32 %n) {
>  ; STORE-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x
> float>*
>  ; STORE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]],
> align 4
>  ; STORE-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float
> 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
> -; STORE-NEXT:    [[ADD6:%.*]] = fadd fast float undef, undef
> -; STORE-NEXT:    [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; STORE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; STORE-NEXT:    [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
>  ; STORE-NEXT:    [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_033]], 1
>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
> @@ -178,14 +172,11 @@ define i32 @mul_red(float* noalias %A, f
>  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
> float>*
>  ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
> align 4
>  ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
> -; CHECK-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
> -; CHECK-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; CHECK-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>  ; CHECK-NEXT:    [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_040]], 1
>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
> @@ -223,14 +214,11 @@ define i32 @mul_red(float* noalias %A, f
>  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
> float>*
>  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
> align 4
>  ; STORE-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
> -; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
> -; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>  ; STORE-NEXT:    [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_040]], 1
>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
> @@ -350,13 +338,6 @@ define i32 @long_red(float* noalias %A,
>  ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x
> float>*
>  ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
> align 4
>  ; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
> -; CHECK-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
> -; CHECK-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
> -; CHECK-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
> -; CHECK-NEXT:    [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
> -; CHECK-NEXT:    [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
> -; CHECK-NEXT:    [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
> -; CHECK-NEXT:    [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
>  ; CHECK-NEXT:    [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
>  ; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds float,
> float* [[A]], i64 [[ADD47]]
>  ; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
> @@ -369,7 +350,6 @@ define i32 @long_red(float* noalias %A,
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
> i32 0
>  ; CHECK-NEXT:    [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
> -; CHECK-NEXT:    [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
>  ; CHECK-NEXT:    [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_083]], 1
>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
> @@ -421,13 +401,6 @@ define i32 @long_red(float* noalias %A,
>  ; STORE-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x
> float>*
>  ; STORE-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
> align 4
>  ; STORE-NEXT:    [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
> -; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
> -; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
> -; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
> -; STORE-NEXT:    [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
> -; STORE-NEXT:    [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
> -; STORE-NEXT:    [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
> -; STORE-NEXT:    [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
>  ; STORE-NEXT:    [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
>  ; STORE-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds float,
> float* [[A]], i64 [[ADD47]]
>  ; STORE-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
> @@ -440,7 +413,6 @@ define i32 @long_red(float* noalias %A,
>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; STORE-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
> i32 0
>  ; STORE-NEXT:    [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
> -; STORE-NEXT:    [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
>  ; STORE-NEXT:    [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_083]], 1
>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
> @@ -576,16 +548,12 @@ define i32 @chain_red(float* noalias %A,
>  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
> float>*
>  ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
> align 4
>  ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
> -; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
> -; CHECK-NEXT:    [[ADD9:%.*]] = fadd fast float [[ADD]], undef
> -; CHECK-NEXT:    [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
> -; CHECK-NEXT:    [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
>  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_043]], 1
>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>  ; CHECK-NEXT:    br i1 [[EXITCOND]], label
> [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
> @@ -622,16 +590,12 @@ define i32 @chain_red(float* noalias %A,
>  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
> float>*
>  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
> align 4
>  ; STORE-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
> -; STORE-NEXT:    [[ADD9:%.*]] = fadd fast float [[ADD]], undef
> -; STORE-NEXT:    [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
>  ; STORE-NEXT:    [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
> -; STORE-NEXT:    [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_043]], 1
>  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>  ; STORE-NEXT:    br i1 [[EXITCOND]], label
> [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
> @@ -1087,14 +1051,11 @@ define i32 @store_red(float* noalias %A,
>  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
> float>*
>  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
> align 4
>  ; STORE-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]]
> -; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
> -; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>  ; STORE-NEXT:    store float [[TMP6]], float* [[C_ADDR_038]], align 4
>  ; STORE-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float*
> [[C_ADDR_038]], i64 1
>  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_039]], 1
> @@ -1169,14 +1130,11 @@ define void @float_red_example4(float* %
>  ; STORE-LABEL: @float_red_example4(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast
> ([32 x float]* @arr_float to <4 x float>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]],
> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
> undef>
>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
> i32 0
> -; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1216,12 +1174,6 @@ define void @float_red_example8(float* %
>  ; STORE-LABEL: @float_red_example8(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast
> ([32 x float]* @arr_float to <8 x float>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
> -; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; STORE-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; STORE-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; STORE-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP0]],
> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1229,7 +1181,6 @@ define void @float_red_example8(float* %
>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float>
> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
> i32 0
> -; STORE-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1293,20 +1244,6 @@ define void @float_red_example16(float*
>  ; STORE-LABEL: @float_red_example16(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast
> ([32 x float]* @arr_float to <16 x float>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
> -; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
> -; STORE-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
> -; STORE-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
> -; STORE-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
> -; STORE-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
> -; STORE-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
> -; STORE-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
> -; STORE-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
> -; STORE-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
> -; STORE-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
> -; STORE-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
> -; STORE-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP0]],
> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1316,7 +1253,6 @@ define void @float_red_example16(float*
>  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x float>
> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]],
> [[RDX_SHUF5]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[BIN_RDX6]],
> i32 0
> -; STORE-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1372,14 +1308,11 @@ define void @i32_red_example4(i32* %res)
>  ; STORE-LABEL: @i32_red_example4(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x
> i32]* @arr_i32 to <4 x i32>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4
> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]],
> <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
> i32 0
> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1419,12 +1352,6 @@ define void @i32_red_example8(i32* %res)
>  ; STORE-LABEL: @i32_red_example8(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x
> i32]* @arr_i32 to <8 x i32>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -1432,7 +1359,6 @@ define void @i32_red_example8(i32* %res)
>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1496,20 +1422,6 @@ define void @i32_red_example16(i32* %res
>  ; STORE-LABEL: @i32_red_example16(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32
> x i32]* @arr_i32 to <16 x i32>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
> -; STORE-NEXT:    [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
> -; STORE-NEXT:    [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
> -; STORE-NEXT:    [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
> -; STORE-NEXT:    [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
> -; STORE-NEXT:    [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
> -; STORE-NEXT:    [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
> -; STORE-NEXT:    [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]],
> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32>
> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1519,7 +1431,6 @@ define void @i32_red_example16(i32* %res
>  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x i32>
> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]],
> [[RDX_SHUF5]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]],
> i32 0
> -; STORE-NEXT:    [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1631,36 +1542,6 @@ define void @i32_red_example32(i32* %res
>  ; STORE-LABEL: @i32_red_example32(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32
> x i32]* @arr_i32 to <32 x i32>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
> -; STORE-NEXT:    [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
> -; STORE-NEXT:    [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
> -; STORE-NEXT:    [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
> -; STORE-NEXT:    [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
> -; STORE-NEXT:    [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
> -; STORE-NEXT:    [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
> -; STORE-NEXT:    [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
> -; STORE-NEXT:    [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
> -; STORE-NEXT:    [[ADD_15:%.*]] = add nsw i32 undef, [[ADD_14]]
> -; STORE-NEXT:    [[ADD_16:%.*]] = add nsw i32 undef, [[ADD_15]]
> -; STORE-NEXT:    [[ADD_17:%.*]] = add nsw i32 undef, [[ADD_16]]
> -; STORE-NEXT:    [[ADD_18:%.*]] = add nsw i32 undef, [[ADD_17]]
> -; STORE-NEXT:    [[ADD_19:%.*]] = add nsw i32 undef, [[ADD_18]]
> -; STORE-NEXT:    [[ADD_20:%.*]] = add nsw i32 undef, [[ADD_19]]
> -; STORE-NEXT:    [[ADD_21:%.*]] = add nsw i32 undef, [[ADD_20]]
> -; STORE-NEXT:    [[ADD_22:%.*]] = add nsw i32 undef, [[ADD_21]]
> -; STORE-NEXT:    [[ADD_23:%.*]] = add nsw i32 undef, [[ADD_22]]
> -; STORE-NEXT:    [[ADD_24:%.*]] = add nsw i32 undef, [[ADD_23]]
> -; STORE-NEXT:    [[ADD_25:%.*]] = add nsw i32 undef, [[ADD_24]]
> -; STORE-NEXT:    [[ADD_26:%.*]] = add nsw i32 undef, [[ADD_25]]
> -; STORE-NEXT:    [[ADD_27:%.*]] = add nsw i32 undef, [[ADD_26]]
> -; STORE-NEXT:    [[ADD_28:%.*]] = add nsw i32 undef, [[ADD_27]]
> -; STORE-NEXT:    [[ADD_29:%.*]] = add nsw i32 undef, [[ADD_28]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]],
> <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x i32>
> [[BIN_RDX]], <32 x i32> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -1672,7 +1553,6 @@ define void @i32_red_example32(i32* %res
>  ; STORE-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x i32>
> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]],
> [[RDX_SHUF7]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]],
> i32 0
> -; STORE-NEXT:    [[ADD_30:%.*]] = add nsw i32 undef, [[ADD_29]]
>  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1750,12 +1630,6 @@ define void @i32_red_call(i32 %val) {
>  ; CHECK-LABEL: @i32_red_call(
>  ; CHECK-NEXT:  entry:
>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x
> i32]* @arr_i32 to <8 x i32>*), align 16
> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -1763,19 +1637,12 @@ define void @i32_red_call(i32 %val) {
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>  ; CHECK-NEXT:    [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
>  ; CHECK-NEXT:    ret void
>  ;
>  ; STORE-LABEL: @i32_red_call(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x
> i32]* @arr_i32 to <8 x i32>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -1783,7 +1650,6 @@ define void @i32_red_call(i32 %val) {
>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>  ; STORE-NEXT:    [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
>  ; STORE-NEXT:    ret void
>  ;
> @@ -1811,12 +1677,6 @@ define void @i32_red_invoke(i32 %val) pe
>  ; CHECK-LABEL: @i32_red_invoke(
>  ; CHECK-NEXT:  entry:
>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x
> i32]* @arr_i32 to <8 x i32>*), align 16
> -; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -1824,7 +1684,6 @@ define void @i32_red_invoke(i32 %val) pe
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>  ; CHECK-NEXT:    [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
>  ; CHECK-NEXT:    to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
>  ; CHECK:       exception:
> @@ -1837,12 +1696,6 @@ define void @i32_red_invoke(i32 %val) pe
>  ; STORE-LABEL: @i32_red_invoke(
>  ; STORE-NEXT:  entry:
>  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x
> i32]* @arr_i32 to <8 x i32>*), align 16
> -; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
> -; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
> -; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
> -; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
> -; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
> -; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
> [[RDX_SHUF]]
>  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -1850,7 +1703,6 @@ define void @i32_red_invoke(i32 %val) pe
>  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>  ; STORE-NEXT:    [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
>  ; STORE-NEXT:    to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
>  ; STORE:       exception:
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll Mon Sep 23
> 09:25:03 2019
> @@ -12,10 +12,10 @@ define i32 @test(double* nocapture %A, i
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1
>  ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 3, i8 3>
> -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
> -; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i8> undef, i8
> [[TMP3]], i32 0
> -; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
> -; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8
> [[TMP5]], i32 1
> +; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
> +; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
> +; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i8> undef, i8
> [[TMP4]], i32 0
> +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8
> [[TMP3]], i32 1
>  ; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double>
>  ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]
>  ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], <double
> 1.000000e+00, double 1.000000e+00>
>
> Modified:
> llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll Mon
> Sep 23 09:25:03 2019
> @@ -5,36 +5,6 @@ define signext i8 @Foo(<32 x i8>* %__v)
>  ; CHECK-LABEL: @Foo(
>  ; CHECK-NEXT:  entry:
>  ; CHECK-NEXT:    [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]],
> align 32
> -; CHECK-NEXT:    [[ADD_I_1_I:%.*]] = add i8 undef, undef
> -; CHECK-NEXT:    [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], undef
> -; CHECK-NEXT:    [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], undef
> -; CHECK-NEXT:    [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], undef
> -; CHECK-NEXT:    [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], undef
> -; CHECK-NEXT:    [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], undef
> -; CHECK-NEXT:    [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], undef
> -; CHECK-NEXT:    [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], undef
> -; CHECK-NEXT:    [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], undef
> -; CHECK-NEXT:    [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], undef
> -; CHECK-NEXT:    [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], undef
> -; CHECK-NEXT:    [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], undef
> -; CHECK-NEXT:    [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], undef
> -; CHECK-NEXT:    [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], undef
> -; CHECK-NEXT:    [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], undef
> -; CHECK-NEXT:    [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], undef
> -; CHECK-NEXT:    [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], undef
> -; CHECK-NEXT:    [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], undef
> -; CHECK-NEXT:    [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], undef
> -; CHECK-NEXT:    [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], undef
> -; CHECK-NEXT:    [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], undef
> -; CHECK-NEXT:    [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], undef
> -; CHECK-NEXT:    [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], undef
> -; CHECK-NEXT:    [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], undef
> -; CHECK-NEXT:    [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], undef
> -; CHECK-NEXT:    [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], undef
> -; CHECK-NEXT:    [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], undef
> -; CHECK-NEXT:    [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], undef
> -; CHECK-NEXT:    [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], undef
> -; CHECK-NEXT:    [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]], <32
> x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21,
> i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32
> 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x i8> [[BIN_RDX]],
> <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> @@ -46,7 +16,6 @@ define signext i8 @Foo(<32 x i8>* %__v)
>  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x i8>
> [[BIN_RDX6]], <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]],
> [[RDX_SHUF7]]
>  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]],
> i32 0
> -; CHECK-NEXT:    [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], undef
>  ; CHECK-NEXT:    ret i8 [[TMP1]]
>  ;
>  entry:
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll Mon
> Sep 23 09:25:03 2019
> @@ -35,13 +35,6 @@ define i32 @test(i32* nocapture readonly
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
> align 4
>  ; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42,
> i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
> -; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
> -; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
> -; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -50,7 +43,6 @@ define i32 @test(i32* nocapture readonly
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]]
> -; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>  ; CHECK:       for.end:
>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
> @@ -138,13 +130,6 @@ define i32 @test2(i32* nocapture readonl
>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]],
> align 4
>  ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
> -; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
> -; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
> -; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -153,7 +138,6 @@ define i32 @test2(i32* nocapture readonl
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
> -; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>  ; CHECK:       for.end:
>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
> @@ -258,13 +242,6 @@ define i32 @test3(i32* nocapture readonl
>  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
>  ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]],
> align 4
>  ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]],
> [[TMP3]]
> -; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
> -; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
> -; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
> -; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
> -; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
> -; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
> -; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -273,7 +250,6 @@ define i32 @test3(i32* nocapture readonl
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
> -; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>  ; CHECK:       for.end:
>  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
>
> Modified:
> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll Mon
> Sep 23 09:25:03 2019
> @@ -26,12 +26,6 @@ define i32 @test_add(i32* nocapture read
>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
> [[P]], i64 7
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[MUL_18:%.*]] = add i32 undef, undef
> -; CHECK-NEXT:    [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
> -; CHECK-NEXT:    [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
> -; CHECK-NEXT:    [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
> -; CHECK-NEXT:    [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
> -; CHECK-NEXT:    [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -39,7 +33,6 @@ define i32 @test_add(i32* nocapture read
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>  ;
>  entry:
> @@ -147,12 +140,6 @@ define i32 @test_and(i32* nocapture read
>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
> [[P]], i64 7
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[MUL_18:%.*]] = and i32 undef, undef
> -; CHECK-NEXT:    [[MUL_29:%.*]] = and i32 undef, [[MUL_18]]
> -; CHECK-NEXT:    [[MUL_310:%.*]] = and i32 undef, [[MUL_29]]
> -; CHECK-NEXT:    [[MUL_411:%.*]] = and i32 undef, [[MUL_310]]
> -; CHECK-NEXT:    [[MUL_512:%.*]] = and i32 undef, [[MUL_411]]
> -; CHECK-NEXT:    [[MUL_613:%.*]] = and i32 undef, [[MUL_512]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -160,7 +147,6 @@ define i32 @test_and(i32* nocapture read
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[MUL_714:%.*]] = and i32 undef, [[MUL_613]]
>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>  ;
>  entry:
> @@ -208,12 +194,6 @@ define i32 @test_or(i32* nocapture reado
>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
> [[P]], i64 7
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[MUL_18:%.*]] = or i32 undef, undef
> -; CHECK-NEXT:    [[MUL_29:%.*]] = or i32 undef, [[MUL_18]]
> -; CHECK-NEXT:    [[MUL_310:%.*]] = or i32 undef, [[MUL_29]]
> -; CHECK-NEXT:    [[MUL_411:%.*]] = or i32 undef, [[MUL_310]]
> -; CHECK-NEXT:    [[MUL_512:%.*]] = or i32 undef, [[MUL_411]]
> -; CHECK-NEXT:    [[MUL_613:%.*]] = or i32 undef, [[MUL_512]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -221,7 +201,6 @@ define i32 @test_or(i32* nocapture reado
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[MUL_714:%.*]] = or i32 undef, [[MUL_613]]
>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>  ;
>  entry:
> @@ -269,12 +248,6 @@ define i32 @test_xor(i32* nocapture read
>  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
> [[P]], i64 7
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[MUL_18:%.*]] = xor i32 undef, undef
> -; CHECK-NEXT:    [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]]
> -; CHECK-NEXT:    [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]]
> -; CHECK-NEXT:    [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]]
> -; CHECK-NEXT:    [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]]
> -; CHECK-NEXT:    [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]],
> <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> @@ -282,7 +255,6 @@ define i32 @test_xor(i32* nocapture read
>  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]],
> [[RDX_SHUF3]]
>  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
> i32 0
> -; CHECK-NEXT:    [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]]
>  ; CHECK-NEXT:    ret i32 [[TMP2]]
>  ;
>  entry:
> @@ -322,15 +294,12 @@ define i32 @PR37731(<4 x i32>* noalias n
>  ; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], <i32 18, i32 2,
> i32 7, i32 13>
>  ; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]]
>  ; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16
> -; CHECK-NEXT:    [[TMP7:%.*]] = xor i32 undef, undef
> -; CHECK-NEXT:    [[TMP8:%.*]] = xor i32 [[TMP7]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4
> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]],
> <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]],
> [[RDX_SHUF1]]
> -; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
> i32 0
> -; CHECK-NEXT:    [[TMP10:%.*]] = xor i32 [[TMP8]], undef
> -; CHECK-NEXT:    ret i32 [[TMP9]]
> +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
> i32 0
> +; CHECK-NEXT:    ret i32 [[TMP7]]
>  ;
>  entry:
>    %0 = load <4 x i32>, <4 x i32>* %self, align 16
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll Mon Sep
> 23 09:25:03 2019
> @@ -33,11 +33,8 @@ define i32 @foo(i32* %diff) #0 {
>  ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
>  ; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]],
> align 4
>  ; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]]
> -; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 undef, [[A_088]]
>  ; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x
> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1
> -; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef
>  ; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x
> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2
> -; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef
>  ; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x
> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3
>  ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
>  ; CHECK-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16
> @@ -47,7 +44,6 @@ define i32 @foo(i32* %diff) #0 {
>  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]],
> [[RDX_SHUF1]]
>  ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
> i32 0
>  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]]
> -; CHECK-NEXT:    [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef
>  ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
>  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
>  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label
> [[FOR_BODY]]
>
> Modified:
> llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
> Mon Sep 23 09:25:03 2019
> @@ -19,11 +19,6 @@ define void @hoge() {
>  ; CHECK-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
>  ; CHECK-NEXT:    [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2
> x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
>  ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], <i32 undef,
> i32 15, i32 31, i32 47>
> -; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef
> -; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32
> undef
> -; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4
> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]],
> [[RDX_SHUF9]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]]
> @@ -31,28 +26,12 @@ define void @hoge() {
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32>
> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32>
> [[RDX_SHUF12]]
>  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32>
> [[RDX_MINMAX_SELECT14]], i32 0
> -; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32
> undef
>  ; CHECK-NEXT:    [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
>  ; CHECK-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63
>  ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
>  ; CHECK-NEXT:    [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
>  ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x
> i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
>  ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49,
> i32 -33, i32 -33, i32 -17>
> -; CHECK-NEXT:    [[TMP26:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef
> -; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 undef, i32
> [[TMP27]]
> -; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP29]]
> -; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP29]], i32
> [[TMP32]]
> -; CHECK-NEXT:    [[TMP36:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP37:%.*]] = select i1 [[TMP36]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP38:%.*]] = icmp sgt i32 [[TMP37]], [[TMP34]]
> -; CHECK-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP34]], i32
> [[TMP37]]
> -; CHECK-NEXT:    [[TMP41:%.*]] = icmp sgt i32 undef, undef
> -; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]]
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4
> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]]
> @@ -70,7 +49,6 @@ define void @hoge() {
>  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32
> [[OP_EXTRA5]], i32 undef
>  ; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef
>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32
> [[OP_EXTRA6]], i32 undef
> -; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32
> [[TMP42]]
>  ; CHECK-NEXT:    [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]]
>  ; CHECK-NEXT:    unreachable
>  ;
>
> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll Mon Sep 23
> 09:25:03 2019
> @@ -16,15 +16,6 @@ define void @_Z2azv() local_unnamed_addr
>  ; CHECK-NEXT:    [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds
> %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76",
> %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1
>  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x
> i32>*
>  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
> align 4
> -; CHECK-NEXT:    [[CMP_I1_4:%.*]] = icmp slt i32 undef, undef
> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_4:%.*]] = select i1 [[CMP_I1_4]],
> i32 undef, i32 undef
> -; CHECK-NEXT:    [[CMP_I1_5:%.*]] = icmp slt i32
> [[DOTSROA_SPECULATED_4]], undef
> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_5:%.*]] = select i1 [[CMP_I1_5]],
> i32 undef, i32 [[DOTSROA_SPECULATED_4]]
> -; CHECK-NEXT:    [[CMP_I1_6:%.*]] = icmp slt i32
> [[DOTSROA_SPECULATED_5]], undef
> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_6:%.*]] = select i1 [[CMP_I1_6]],
> i32 undef, i32 [[DOTSROA_SPECULATED_5]]
> -; CHECK-NEXT:    [[CMP_I1_7:%.*]] = icmp slt i32
> [[DOTSROA_SPECULATED_6]], undef
> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_7:%.*]] = select i1 [[CMP_I1_7]],
> i32 undef, i32 [[DOTSROA_SPECULATED_6]]
> -; CHECK-NEXT:    [[CMP_I1_8:%.*]] = icmp slt i32 undef, undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP1]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP1]], <8 x i32> [[RDX_SHUF]]
> @@ -39,7 +30,6 @@ define void @_Z2azv() local_unnamed_addr
>  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32
> undef
>  ; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef
>  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP4]], i32
> [[OP_EXTRA]], i32 undef
> -; CHECK-NEXT:    [[DOTSROA_SPECULATED_8:%.*]] = select i1 [[CMP_I1_8]],
> i32 undef, i32 undef
>  ; CHECK-NEXT:    [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32
> undef, i32 [[OP_EXTRA7]]
>  ; CHECK-NEXT:    [[CMP_I1_10:%.*]] = icmp slt i32
> [[DOTSROA_SPECULATED_9]], undef
>  ; CHECK-NEXT:    ret void
>
> Modified:
> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll?rev=372626&r1=372625&r2=372626&view=diff
>
> ==============================================================================
> ---
> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
> (original)
> +++
> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll Mon
> Sep 23 09:25:03 2019
> @@ -18,19 +18,6 @@ define i32 @foo(i32* nocapture readonly
>  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
> [[A7:%.*]], i32 6
>  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
> [[A8:%.*]], i32 7
>  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
> -; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
> -; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
> -; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
> undef
> -; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
> -; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
> i32 undef
> -; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
> -; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
> i32 undef
> -; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
> -; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
> i32 undef
> -; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
> -; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
> i32 undef
> -; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
> @@ -41,7 +28,6 @@ define i32 @foo(i32* nocapture readonly
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
> [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32>
> [[RDX_MINMAX_SELECT6]], i32 0
> -; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
> i32 undef
>  ; CHECK-NEXT:    ret i32 [[TMP11]]
>  ;
>  entry:
> @@ -92,19 +78,6 @@ define i32 @foo1(i32* nocapture readonly
>  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
> [[A7:%.*]], i32 6
>  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
> [[A8:%.*]], i32 7
>  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
> -; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
> -; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
> -; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
> undef
> -; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
> -; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
> i32 undef
> -; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
> -; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
> i32 undef
> -; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
> -; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
> i32 undef
> -; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
> -; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
> i32 undef
> -; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
> @@ -115,7 +88,6 @@ define i32 @foo1(i32* nocapture readonly
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
> [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32>
> [[RDX_MINMAX_SELECT6]], i32 0
> -; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
> i32 undef
>  ; CHECK-NEXT:    ret i32 [[TMP11]]
>  ;
>  entry:
> @@ -170,19 +142,6 @@ define i32 @foo2(i32* nocapture readonly
>  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
> [[A7:%.*]], i32 6
>  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
> [[A8:%.*]], i32 7
>  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
> -; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
> -; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
> -; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
> -; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
> undef
> -; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
> -; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
> i32 undef
> -; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
> -; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
> i32 undef
> -; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
> -; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
> i32 undef
> -; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
> -; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
> i32 undef
> -; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8
> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
> i32 undef, i32 undef>
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
> [[RDX_SHUF]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
> @@ -193,7 +152,6 @@ define i32 @foo2(i32* nocapture readonly
>  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
> [[RDX_SHUF4]]
>  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32>
> [[RDX_MINMAX_SELECT6]], i32 0
> -; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
> i32 undef
>  ; CHECK-NEXT:    ret i32 [[TMP11]]
>  ;
>  entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190926/922e5d7c/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4849 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190926/922e5d7c/attachment-0001.bin>


More information about the llvm-commits mailing list