[llvm] r372626 - [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!")
Jordan Rupprecht via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 15:10:44 PDT 2019
Temporarily reverted as r373019
On Thu, Sep 26, 2019 at 2:55 PM Jordan Rupprecht <rupprecht at google.com>
wrote:
> Looks like this causes some crashers now. On the unreduced C++ source we
> see it w/ -fexperimental-new-pass-manager but on the reduced case it seems
> to not matter. (Strange...)
>
> Anyway, the repro: $ clang -O3 -c reduced.ll
>
> Where reduced.ll is:
>
> ; ModuleID = 'reduced.ll'
> source_filename = "reduced.ll"
> target triple = "x86_64-unknown-linux-gnu"
>
> @k = external dso_local constant [8 x [4 x i32]], align 16
> @l = external dso_local global [366 x i32], align 16
>
> define void @n() {
> entry:
> %i = alloca i32, align 4
> %a = alloca i32, align 4
> %b = alloca i32, align 4
> %c = alloca i32, align 4
> %cb = alloca i32, align 4
> %cw = alloca i32, align 4
> %d = alloca i32, align 4
> %e = alloca i32, align 4
> br label %for.cond
>
> for.cond: ; preds = %for.end17,
> %entry
> %0 = load i32, i32* %i, align 4
> %add = add nsw i32 %0, -183
> store i32 %add, i32* %a, align 4
> store i32 0, i32* %c, align 4
> store i32 0, i32* %cb, align 4
> br label %for.cond3
>
> for.cond3: ; preds = %for.end,
> %for.cond
> %1 = load i32, i32* %cb, align 4
> %cmp4 = icmp slt i32 %1, 8
> br i1 %cmp4, label %for.body5, label %for.end17
>
> for.body5: ; preds = %for.cond3
> store i32 0, i32* %cw, align 4
> br label %for.cond6
>
> for.cond6: ; preds = %if.end14,
> %for.body5
> %2 = load i32, i32* %cw, align 4
> %cmp7 = icmp slt i32 %2, 4
> %3 = load i32, i32* %cb, align 4
> br i1 %cmp7, label %for.body8, label %for.end
>
> for.body8: ; preds = %for.cond6
> %g = sext i32 %3 to i64
> %arrayidx = getelementptr inbounds [8 x [4 x i32]], [8 x [4 x i32]]* @k,
> i64 0, i64 %g
> %4 = load i32, i32* %cw, align 4
> %f = sext i32 %4 to i64
> %h = getelementptr inbounds [4 x i32], [4 x i32]* %arrayidx, i64 0, i64
> %f
> %5 = load i32, i32* %h, align 4
> store i32 %5, i32* %d, align 4
> %6 = load i32, i32* %a, align 4
> %7 = load i32, i32* %d, align 4
> %sub = sub nsw i32 %6, %7
> %j = call i32 @abs(i32 %sub)
> store i32 %j, i32* %e, align 4
> %8 = load i32, i32* %e, align 4
> %9 = load i32, i32* %b, align 4
> %cmp12 = icmp slt i32 %8, %9
> br i1 %cmp12, label %if.then13, label %if.end14
>
> if.then13: ; preds = %for.body8
> %10 = load i32, i32* %cb, align 4
> store i32 %10, i32* %c, align 4
> %11 = load i32, i32* %e, align 4
> store i32 %11, i32* %b, align 4
> br label %if.end14
>
> if.end14: ; preds = %if.then13,
> %for.body8
> %12 = load i32, i32* %cw, align 4
> %inc = add nsw i32 %12, 1
> store i32 %inc, i32* %cw, align 4
> br label %for.cond6
>
> for.end: ; preds = %for.cond6
> %inc16 = add nsw i32 %3, 1
> store i32 %inc16, i32* %cb, align 4
> br label %for.cond3
>
> for.end17: ; preds = %for.cond3
> %13 = load i32, i32* %c, align 4
> %14 = load i32, i32* %i, align 4
> %g18 = sext i32 %14 to i64
> %k = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 %g18
> store i32 %13, i32* %k, align 4
> %15 = load i32, i32* %i, align 4
> %inc21 = add nsw i32 %15, 1
> store i32 %inc21, i32* %i, align 4
> br label %for.cond
> }
>
> declare i32 @abs(i32)
>
> =>
>
> Stack dump:
> 0. Program arguments: /src/llvm-build/dev/bin/clang-10 -cc1 -triple
> x86_64-unknown-linux-gnu -emit-obj -disable-free -disable-llvm-verifier
> -discard-value-names -main-file-name reduced.ll -mrelocation-model static
> -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose
> -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64
> -dwarf-column-info -debugger-tuning=gdb -coverage-notes-file
> /tmp/crash/reduced.gcno -resource-dir /src/llvm-build/dev/lib/clang/10.0.0
> -O3 -fdebug-compilation-dir /tmp/crash -ferror-limit 19 -fmessage-length 0
> -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops
> -vectorize-slp -faddrsig -o reduced.o -x ir reduced.ll
> 1. Per-module optimization passes
> 2. Running pass 'Function Pass Manager' on module 'reduced.ll'.
> 3. Running pass 'Combine redundant instructions' on function '@n'
> #0 0x00007fd9935ae474 llvm::sys::PrintStackTrace(llvm::raw_ostream&)
> /src/llvm-project/llvm/lib/Support/Unix/Signals.inc:532:13
>
> #1 0x00007fd9935ae474 PrintStackTraceSignalHandler(void*)
> /src/llvm-project/llvm/lib/Support/Unix/Signals.inc:592:3
>
> #2 0x00007fd9935ac34e llvm::sys::RunSignalHandlers()
> /src/llvm-project/llvm/lib/Support/Signals.cpp:69:18
>
>
> #3 0x00007fd9935ae728 SignalHandler(int)
> /src/llvm-project/llvm/lib/Support/Unix/Signals.inc:384:1
> #4 0x00007fd992fa73a0 __restore_rt
> (/lib/x86_64-linux-gnu/libpthread.so.0+0x123a0)
> #5 0x00007fd993a9c4cc llvm::ConstantInt::classof(llvm::Value const*)
> /src/llvm-project/llvm/include/llvm/IR/Constants.h:256:28
>
> #6 0x00007fd993a9c4cc llvm::isa_impl<llvm::ConstantInt, llvm::Value,
> void>::doit(llvm::Value const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:58:12
>
> #7 0x00007fd993a9c4cc llvm::isa_impl_cl<llvm::ConstantInt, llvm::Value
> const*>::doit(llvm::Value const*)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:106:12
>
> #8 0x00007fd993a9c4cc llvm::isa_impl_wrap<llvm::ConstantInt, llvm::Value
> const*, llvm::Value const*>::doit(llvm::Value const* const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:132:12
> #9 0x00007fd993a9c4cc llvm::isa_impl_wrap<llvm::ConstantInt, llvm::Value*
> const, llvm::Value const*>::doit(llvm::Value* const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:122:12
>
> #10 0x00007fd993a9c4cc bool llvm::isa<llvm::ConstantInt,
> llvm::Value*>(llvm::Value* const&)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:142:10
>
> #11 0x00007fd993a9c4cc llvm::cast_retty<llvm::ConstantInt,
> llvm::Value*>::ret_type llvm::dyn_cast<llvm::ConstantInt,
> llvm::Value>(llvm::Value*)
> /src/llvm-project/llvm/include/llvm/Support/Casting.h:343:10
> #12 0x00007fd993a9c4cc llvm::InstCombiner::foldOrOfICmps(llvm::ICmpInst*,
> llvm::ICmpInst*, llvm::Instruction&)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp:2148:23
>
> #13 0x00007fd993aa260e llvm::InstCombiner::visitOr(llvm::BinaryOperator&)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp:2592:18
>
> #14 0x00007fd993a7f0d0 llvm::InstCombiner::run()
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3312:22
>
>
> #15 0x00007fd993a8013f combineInstructionsOverFunction(llvm::Function&,
> llvm::InstCombineWorklist&, llvm::AAResults*, llvm::AssumptionCache&,
> llvm::TargetLibraryInfo&, llvm::DominatorTree&,
> llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*,
> llvm::ProfileSummaryInfo*, bool, llvm::LoopInfo*)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3546:13
>
> #16 0x00007fd993a81077
> llvm::InstructionCombiningPass::runOnFunction(llvm::Function&)
> /src/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:3620:10
>
> #17 0x00007fd993ef1816 llvm::FPPassManager::runOnFunction(llvm::Function&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1648:27
>
> #18 0x00007fd993ef1ad3 llvm::FPPassManager::runOnModule(llvm::Module&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1685:13
>
> #19 0x00007fd993ef2128 (anonymous
> namespace)::MPPassManager::runOnModule(llvm::Module&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1750:27
>
> #20 0x00007fd993ef2128 llvm::legacy::PassManagerImpl::run(llvm::Module&)
> /src/llvm-project/llvm/lib/IR/LegacyPassManager.cpp:1863:44
>
> #21 0x00007fd992767ac2 (anonymous
> namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction,
> std::unique_ptr<llvm::raw_pwrite_stream,
> std::default_delete<llvm::raw_pwrite_stream> >)
> /src/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:909:3
> #22 0x00007fd992767ac2 clang::EmitBackendOutput(clang::DiagnosticsEngine&,
> clang::HeaderSearchOptions const&, clang::CodeGenOptions const&,
> clang::TargetOptions const&, clang::LangOptions const&, llvm::DataLayout
> const&, llvm::Module*, clang::BackendAction,
> std::unique_ptr<llvm::raw_pwrite_stream,
> std::default_delete<llvm::raw_pwrite_stream> >)
> /src/llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1533:15
> #23 0x00007fd9929c4e4d std::unique_ptr<llvm::raw_pwrite_stream,
> std::default_delete<llvm::raw_pwrite_stream> >::~unique_ptr()
> /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/unique_ptr.h:273:6
>
> #24 0x00007fd9929c4e4d clang::CodeGenAction::ExecuteAction()
> /src/llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:1080:5
>
> #25 0x00007fd9923ee4a9 clang::FrontendAction::Execute()
> /src/llvm-project/clang/lib/Frontend/FrontendAction.cpp:939:10
>
>
> #26 0x00007fd99238de50 llvm::Error::getPtr() const
> /src/llvm-project/llvm/include/llvm/Support/Error.h:273:42
>
>
> #27 0x00007fd99238de50 llvm::Error::operator bool()
> /src/llvm-project/llvm/include/llvm/Support/Error.h:236:16
>
>
> #28 0x00007fd99238de50
> clang::CompilerInstance::ExecuteAction(clang::FrontendAction&)
> /src/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:957:23
>
> #29 0x00007fd9922e952c
> clang::ExecuteCompilerInvocation(clang::CompilerInstance*)
> /src/llvm-project/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:290:25
>
> #30 0x0000000000213bfd cc1_main(llvm::ArrayRef<char const*>, char const*,
> void*) /src/llvm-project/clang/tools/driver/cc1_main.cpp:250:15
>
> #31 0x0000000000211e3f ExecuteCC1Tool(llvm::ArrayRef<char const*>,
> llvm::StringRef) /src/llvm-project/clang/tools/driver/driver.cpp:309:12
>
> #32 0x0000000000211e3f main
> /src/llvm-project/clang/tools/driver/driver.cpp:382:12
>
> On Mon, Sep 23, 2019 at 9:23 AM Alexey Bataev via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: abataev
>> Date: Mon Sep 23 09:25:03 2019
>> New Revision: 372626
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=372626&view=rev
>> Log:
>> [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L)
>> && "SCEVAddRecExpr operand is not loop-invariant!")
>>
>> Summary:
>> Initially SLP vectorizer replaced all going-to-be-vectorized
>> instructions with Undef values. It may break ScalarEvaluation and may
>> cause a crash.
>> Reworked SLP vectorizer so that it does not replace vectorized
>> instructions by UndefValue anymore. Instead vectorized instructions are
>> marked for deletion inside if BoUpSLP class and deleted upon class
>> destruction.
>>
>> Reviewers: mzolotukhin, mkuper, hfinkel, RKSimon, davide, spatel
>>
>> Subscribers: RKSimon, Gerolf, anemet, hans, majnemer, llvm-commits, sanjoy
>>
>> Differential Revision: https://reviews.llvm.org/D29641
>>
>> Added:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll
>> Modified:
>> llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
>> llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll
>>
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>>
>> Modified: llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
>> (original)
>> +++ llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h Mon Sep
>> 23 09:25:03 2019
>> @@ -24,7 +24,6 @@
>> #include "llvm/ADT/SmallVector.h"
>> #include "llvm/Analysis/AliasAnalysis.h"
>> #include "llvm/IR/PassManager.h"
>> -#include "llvm/IR/ValueHandle.h"
>>
>> namespace llvm {
>>
>> @@ -60,8 +59,8 @@ extern cl::opt<bool> RunSLPVectorization
>> struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
>> using StoreList = SmallVector<StoreInst *, 8>;
>> using StoreListMap = MapVector<Value *, StoreList>;
>> - using WeakTrackingVHList = SmallVector<WeakTrackingVH, 8>;
>> - using WeakTrackingVHListMap = MapVector<Value *, WeakTrackingVHList>;
>> + using GEPList = SmallVector<GetElementPtrInst *, 8>;
>> + using GEPListMap = MapVector<Value *, GEPList>;
>>
>> ScalarEvolution *SE = nullptr;
>> TargetTransformInfo *TTI = nullptr;
>> @@ -131,7 +130,7 @@ private:
>>
>> /// Tries to vectorize constructs started from CmpInst,
>> InsertValueInst or
>> /// InsertElementInst instructions.
>> - bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
>> + bool vectorizeSimpleInstructions(SmallVectorImpl<Instruction *>
>> &Instructions,
>> BasicBlock *BB,
>> slpvectorizer::BoUpSLP &R);
>>
>> /// Scan the basic block and look for patterns that are likely to start
>> @@ -147,7 +146,7 @@ private:
>> StoreListMap Stores;
>>
>> /// The getelementptr instructions in a basic block organized by base
>> pointer.
>> - WeakTrackingVHListMap GEPs;
>> + GEPListMap GEPs;
>> };
>>
>> } // end namespace llvm
>>
>> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Mon Sep 23
>> 09:25:03 2019
>> @@ -1121,6 +1121,14 @@ public:
>> #endif
>> };
>>
>> + /// Checks if the instruction is marked for deletion.
>> + bool isDeleted(Instruction *I) const { return
>> DeletedInstructions.count(I); }
>> +
>> + /// Marks values for later deletion.
>> + void eraseInstructions(ArrayRef<Value *> AV);
>> +
>> + ~BoUpSLP();
>> +
>> private:
>> /// Checks if all users of \p I are the part of the vectorization tree.
>> bool areAllUsersVectorized(Instruction *I) const;
>> @@ -1491,14 +1499,12 @@ private:
>> /// AliasCache, which can happen if a new instruction is allocated at
>> the
>> /// same address as a previously deleted instruction.
>> void eraseInstruction(Instruction *I) {
>> - I->removeFromParent();
>> - I->dropAllReferences();
>> - DeletedInstructions.emplace_back(I);
>> + DeletedInstructions.insert(I);
>> }
>>
>> /// Temporary store for deleted instructions. Instructions will be
>> deleted
>> /// eventually when the BoUpSLP is destructed.
>> - SmallVector<unique_value, 8> DeletedInstructions;
>> + SmallPtrSet<Instruction *, 8> DeletedInstructions;
>>
>> /// A list of values that need to extracted out of the tree.
>> /// This list holds pairs of (Internal Scalar : External User).
>> External User
>> @@ -2055,6 +2061,22 @@ template <> struct DOTGraphTraits<BoUpSL
>>
>> } // end namespace llvm
>>
>> +BoUpSLP::~BoUpSLP() {
>> + for (auto *I : DeletedInstructions)
>> + I->dropAllReferences();
>> + for (auto *I : DeletedInstructions) {
>> + assert(I->use_empty() && "trying to erase instruction with users.");
>> + I->eraseFromParent();
>> + }
>> +}
>> +
>> +void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
>> + for (auto *V : AV) {
>> + if (auto *I = dyn_cast<Instruction>(V))
>> + eraseInstruction(I);
>> + };
>> +}
>> +
>> void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
>> ArrayRef<Value *> UserIgnoreLst) {
>> ExtraValueToDebugLocsMap ExternallyUsedValues;
>> @@ -3541,7 +3563,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *>
>> // Generate the 'InsertElement' instruction.
>> for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
>> Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
>> - if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
>> + if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
>> GatherSeq.insert(Insrt);
>> CSEBlocks.insert(Insrt->getParent());
>>
>> @@ -4290,20 +4312,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebug
>> for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
>> Value *Scalar = Entry->Scalars[Lane];
>>
>> +#ifndef NDEBUG
>> Type *Ty = Scalar->getType();
>> if (!Ty->isVoidTy()) {
>> -#ifndef NDEBUG
>> for (User *U : Scalar->users()) {
>> LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
>>
>> - // It is legal to replace users in the ignorelist by undef.
>> + // It is legal to delete users in the ignorelist.
>> assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
>> - "Replacing out-of-tree value with undef");
>> + "Deleting out-of-tree value");
>> }
>> -#endif
>> - Value *Undef = UndefValue::get(Ty);
>> - Scalar->replaceAllUsesWith(Undef);
>> }
>> +#endif
>> LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
>> eraseInstruction(cast<Instruction>(Scalar));
>> }
>> @@ -4319,7 +4339,7 @@ void BoUpSLP::optimizeGatherSequence() {
>> << " gather sequences instructions.\n");
>> // LICM InsertElementInst sequences.
>> for (Instruction *I : GatherSeq) {
>> - if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I))
>> + if (isDeleted(I))
>> continue;
>>
>> // Check if this block is inside a loop.
>> @@ -4373,6 +4393,8 @@ void BoUpSLP::optimizeGatherSequence() {
>> // For all instructions in blocks containing gather sequences:
>> for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;)
>> {
>> Instruction *In = &*it++;
>> + if (isDeleted(In))
>> + continue;
>> if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
>> continue;
>>
>> @@ -5255,19 +5277,6 @@ bool SLPVectorizerPass::runImpl(Function
>> return Changed;
>> }
>>
>> -/// Check that the Values in the slice in VL array are still existent in
>> -/// the WeakTrackingVH array.
>> -/// Vectorization of part of the VL array may cause later values in the
>> VL array
>> -/// to become invalid. We track when this has happened in the
>> WeakTrackingVH
>> -/// array.
>> -static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
>> - ArrayRef<WeakTrackingVH> VH, unsigned
>> SliceBegin,
>> - unsigned SliceSize) {
>> - VL = VL.slice(SliceBegin, SliceSize);
>> - VH = VH.slice(SliceBegin, SliceSize);
>> - return !std::equal(VL.begin(), VL.end(), VH.begin());
>> -}
>> -
>> bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
>> BoUpSLP &R,
>> unsigned VecRegSize) {
>> const unsigned ChainLen = Chain.size();
>> @@ -5279,20 +5288,20 @@ bool SLPVectorizerPass::vectorizeStoreCh
>> if (!isPowerOf2_32(Sz) || VF < 2)
>> return false;
>>
>> - // Keep track of values that were deleted by vectorizing in the loop
>> below.
>> - const SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(),
>> Chain.end());
>> -
>> bool Changed = false;
>> // Look for profitable vectorizable trees at all offsets, starting at
>> zero.
>> for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) {
>>
>> + ArrayRef<Value *> Operands = Chain.slice(i, VF);
>> // Check that a previous iteration of this loop did not delete the
>> Value.
>> - if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
>> + if (llvm::any_of(Operands, [&R](Value *V) {
>> + auto *I = dyn_cast<Instruction>(V);
>> + return I && R.isDeleted(I);
>> + }))
>> continue;
>>
>> LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset "
>> << i
>> << "\n");
>> - ArrayRef<Value *> Operands = Chain.slice(i, VF);
>>
>> R.buildTree(Operands);
>> if (R.isTreeTinyAndNotFullyVectorizable())
>> @@ -5484,9 +5493,6 @@ bool SLPVectorizerPass::tryToVectorizeLi
>> bool CandidateFound = false;
>> int MinCost = SLPCostThreshold;
>>
>> - // Keep track of values that were deleted by vectorizing in the loop
>> below.
>> - SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
>> -
>> unsigned NextInst = 0, MaxInst = VL.size();
>> for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /=
>> 2) {
>> // No actual vectorization should happen, if number of parts is the
>> same as
>> @@ -5506,13 +5512,16 @@ bool SLPVectorizerPass::tryToVectorizeLi
>> if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
>> break;
>>
>> + ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
>> // Check that a previous iteration of this loop did not delete the
>> Value.
>> - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
>> + if (llvm::any_of(Ops, [&R](Value *V) {
>> + auto *I = dyn_cast<Instruction>(V);
>> + return I && R.isDeleted(I);
>> + }))
>> continue;
>>
>> LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations
>> "
>> << "\n");
>> - ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
>>
>> R.buildTree(Ops);
>> Optional<ArrayRef<unsigned>> Order = R.bestOrder();
>> @@ -5733,23 +5742,23 @@ class HorizontalReduction {
>> case RK_Min:
>> Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS,
>> RHS)
>> : Builder.CreateFCmpOLT(LHS,
>> RHS);
>> - break;
>> + return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>> case RK_Max:
>> Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS,
>> RHS)
>> : Builder.CreateFCmpOGT(LHS,
>> RHS);
>> - break;
>> + return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>> case RK_UMin:
>> assert(Opcode == Instruction::ICmp && "Expected integer types.");
>> Cmp = Builder.CreateICmpULT(LHS, RHS);
>> - break;
>> + return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>> case RK_UMax:
>> assert(Opcode == Instruction::ICmp && "Expected integer types.");
>> Cmp = Builder.CreateICmpUGT(LHS, RHS);
>> - break;
>> + return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>> case RK_None:
>> - llvm_unreachable("Unknown reduction operation.");
>> + break;
>> }
>> - return Builder.CreateSelect(Cmp, LHS, RHS, Name);
>> + llvm_unreachable("Unknown reduction operation.");
>> }
>>
>> public:
>> @@ -6429,6 +6438,9 @@ public:
>> }
>> // Update users.
>> ReductionRoot->replaceAllUsesWith(VectorizedTree);
>> + // Mark all scalar reduction ops for deletion, they are replaced
>> by the
>> + // vector reductions.
>> + V.eraseInstructions(IgnoreList);
>> }
>> return VectorizedTree != nullptr;
>> }
>> @@ -6683,18 +6695,13 @@ static bool tryToVectorizeHorReductionOr
>> // horizontal reduction.
>> // Interrupt the process if the Root instruction itself was vectorized
>> or all
>> // sub-trees not higher that RecursionMaxDepth were
>> analyzed/vectorized.
>> - SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root,
>> 0});
>> + SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
>> SmallPtrSet<Value *, 8> VisitedInstrs;
>> bool Res = false;
>> while (!Stack.empty()) {
>> - Value *V;
>> + Instruction *Inst;
>> unsigned Level;
>> - std::tie(V, Level) = Stack.pop_back_val();
>> - if (!V)
>> - continue;
>> - auto *Inst = dyn_cast<Instruction>(V);
>> - if (!Inst)
>> - continue;
>> + std::tie(Inst, Level) = Stack.pop_back_val();
>> auto *BI = dyn_cast<BinaryOperator>(Inst);
>> auto *SI = dyn_cast<SelectInst>(Inst);
>> if (BI || SI) {
>> @@ -6735,8 +6742,8 @@ static bool tryToVectorizeHorReductionOr
>> for (auto *Op : Inst->operand_values())
>> if (VisitedInstrs.insert(Op).second)
>> if (auto *I = dyn_cast<Instruction>(Op))
>> - if (!isa<PHINode>(I) && I->getParent() == BB)
>> - Stack.emplace_back(Op, Level);
>> + if (!isa<PHINode>(I) && !R.isDeleted(I) && I->getParent() ==
>> BB)
>> + Stack.emplace_back(I, Level);
>> }
>> return Res;
>> }
>> @@ -6805,11 +6812,10 @@ bool SLPVectorizerPass::vectorizeCmpInst
>> }
>>
>> bool SLPVectorizerPass::vectorizeSimpleInstructions(
>> - SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
>> + SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB,
>> BoUpSLP &R) {
>> bool OpsChanged = false;
>> - for (auto &VH : reverse(Instructions)) {
>> - auto *I = dyn_cast_or_null<Instruction>(VH);
>> - if (!I)
>> + for (auto *I : reverse(Instructions)) {
>> + if (R.isDeleted(I))
>> continue;
>> if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
>> OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
>> @@ -6838,7 +6844,7 @@ bool SLPVectorizerPass::vectorizeChainsI
>> if (!P)
>> break;
>>
>> - if (!VisitedInstrs.count(P))
>> + if (!VisitedInstrs.count(P) && !R.isDeleted(P))
>> Incoming.push_back(P);
>> }
>>
>> @@ -6882,9 +6888,12 @@ bool SLPVectorizerPass::vectorizeChainsI
>>
>> VisitedInstrs.clear();
>>
>> - SmallVector<WeakVH, 8> PostProcessInstructions;
>> + SmallVector<Instruction *, 8> PostProcessInstructions;
>> SmallDenseSet<Instruction *, 4> KeyNodes;
>> for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;
>> ++it) {
>> + // Skip instructions marked for the deletion.
>> + if (R.isDeleted(&*it))
>> + continue;
>> // We may go through BB multiple times so skip the one we have
>> checked.
>> if (!VisitedInstrs.insert(&*it).second) {
>> if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
>> @@ -6977,10 +6986,10 @@ bool SLPVectorizerPass::vectorizeGEPIndi
>> SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
>>
>> // Some of the candidates may have already been vectorized after we
>> - // initially collected them. If so, the WeakTrackingVHs will have
>> - // nullified the
>> - // values, so remove them from the set of candidates.
>> - Candidates.remove(nullptr);
>> + // initially collected them. If so, they are marked as deleted, so
>> remove
>> + // them from the set of candidates.
>> + Candidates.remove_if(
>> + [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
>>
>> // Remove from the set of candidates all pairs of getelementptrs
>> with
>> // constant differences. Such getelementptrs are likely not good
>> @@ -6988,18 +6997,18 @@ bool SLPVectorizerPass::vectorizeGEPIndi
>> // computed from the other. We also ensure all candidate
>> getelementptr
>> // indices are unique.
>> for (int I = 0, E = GEPList.size(); I < E && Candidates.size() >
>> 1; ++I) {
>> - auto *GEPI = cast<GetElementPtrInst>(GEPList[I]);
>> + auto *GEPI = GEPList[I];
>> if (!Candidates.count(GEPI))
>> continue;
>> auto *SCEVI = SE->getSCEV(GEPList[I]);
>> for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
>> - auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]);
>> + auto *GEPJ = GEPList[J];
>> auto *SCEVJ = SE->getSCEV(GEPList[J]);
>> if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
>> - Candidates.remove(GEPList[I]);
>> - Candidates.remove(GEPList[J]);
>> + Candidates.remove(GEPI);
>> + Candidates.remove(GEPJ);
>> } else if (GEPI->idx_begin()->get() ==
>> GEPJ->idx_begin()->get()) {
>> - Candidates.remove(GEPList[J]);
>> + Candidates.remove(GEPJ);
>> }
>> }
>> }
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -17,16 +17,8 @@ define void @PR28330(i32 %n) {
>> ; DEFAULT: for.body:
>> ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]],
>> [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
>> ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>> -; DEFAULT-NEXT: [[P20:%.*]] = add i32 [[P17]], undef
>> -; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef
>> -; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef
>> -; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef
>> -; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef
>> -; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef
>> -; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef
>> ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
>> ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
>> -; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef
>> ; DEFAULT-NEXT: br label [[FOR_BODY]]
>> ;
>> ; GATHER-LABEL: @PR28330(
>> @@ -36,37 +28,30 @@ define void @PR28330(i32 %n) {
>> ; GATHER-NEXT: br label [[FOR_BODY:%.*]]
>> ; GATHER: for.body:
>> ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
>> ], [ 0, [[ENTRY:%.*]] ]
>> -; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> -; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP2]], i32 0
>> -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1
>> [[TMP4]], i32 1
>> -; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1
>> [[TMP6]], i32 2
>> -; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1
>> [[TMP8]], i32 3
>> -; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1
>> [[TMP10]], i32 4
>> -; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> -; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1
>> [[TMP12]], i32 5
>> -; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> -; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1
>> [[TMP14]], i32 6
>> -; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> -; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1
>> [[TMP16]], i32 7
>> +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> +; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> +; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP3]], i32 0
>> +; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> +; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1
>> [[TMP5]], i32 1
>> +; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> +; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1
>> [[TMP7]], i32 2
>> +; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> +; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1
>> [[TMP9]], i32 3
>> +; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> +; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1
>> [[TMP11]], i32 4
>> +; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> +; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1
>> [[TMP13]], i32 5
>> +; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> +; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1
>> [[TMP15]], i32 6
>> +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1
>> [[TMP2]], i32 7
>> ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 0
>> -; GATHER-NEXT: [[P20:%.*]] = add i32 [[P17]], [[TMP19]]
>> ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 1
>> -; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
>> ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 2
>> -; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
>> ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 3
>> -; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
>> ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 4
>> -; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
>> ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 5
>> -; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
>> ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 6
>> -; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
>> ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32
>> [[TMP19]], i32 0
>> ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32
>> [[TMP20]], i32 1
>> ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32
>> [[TMP21]], i32 2
>> @@ -78,7 +63,6 @@ define void @PR28330(i32 %n) {
>> ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32
>> [[TMP33]], i32 7
>> ; GATHER-NEXT: [[TMP35:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
>> ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]]
>> -; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
>> ; GATHER-NEXT: br label [[FOR_BODY]]
>> ;
>> ; MAX-COST-LABEL: @PR28330(
>> @@ -169,16 +153,8 @@ define void @PR32038(i32 %n) {
>> ; DEFAULT: for.body:
>> ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]],
>> [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
>> ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>> -; DEFAULT-NEXT: [[P20:%.*]] = add i32 -5, undef
>> -; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef
>> -; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef
>> -; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef
>> -; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef
>> -; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef
>> -; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef
>> ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
>> ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
>> -; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef
>> ; DEFAULT-NEXT: br label [[FOR_BODY]]
>> ;
>> ; GATHER-LABEL: @PR32038(
>> @@ -188,37 +164,30 @@ define void @PR32038(i32 %n) {
>> ; GATHER-NEXT: br label [[FOR_BODY:%.*]]
>> ; GATHER: for.body:
>> ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]]
>> ], [ 0, [[ENTRY:%.*]] ]
>> -; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> -; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP2]], i32 0
>> -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1
>> [[TMP4]], i32 1
>> -; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1
>> [[TMP6]], i32 2
>> -; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1
>> [[TMP8]], i32 3
>> -; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1
>> [[TMP10]], i32 4
>> -; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> -; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1
>> [[TMP12]], i32 5
>> -; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> -; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1
>> [[TMP14]], i32 6
>> -; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> -; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1
>> [[TMP16]], i32 7
>> +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
>> +; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
>> +; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1
>> [[TMP3]], i32 0
>> +; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
>> +; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1
>> [[TMP5]], i32 1
>> +; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
>> +; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1
>> [[TMP7]], i32 2
>> +; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
>> +; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1
>> [[TMP9]], i32 3
>> +; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
>> +; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1
>> [[TMP11]], i32 4
>> +; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
>> +; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1
>> [[TMP13]], i32 5
>> +; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
>> +; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1
>> [[TMP15]], i32 6
>> +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1
>> [[TMP2]], i32 7
>> ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32
>> -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32
>> -80, i32 -80>
>> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 0
>> -; GATHER-NEXT: [[P20:%.*]] = add i32 -5, [[TMP19]]
>> ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 1
>> -; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
>> ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 2
>> -; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
>> ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 3
>> -; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
>> ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 4
>> -; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
>> ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 5
>> -; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
>> ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]],
>> i32 6
>> -; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
>> ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32
>> [[TMP19]], i32 0
>> ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32
>> [[TMP20]], i32 1
>> ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32
>> [[TMP21]], i32 2
>> @@ -230,7 +199,6 @@ define void @PR32038(i32 %n) {
>> ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32
>> [[TMP33]], i32 7
>> ; GATHER-NEXT: [[TMP35:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
>> ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5
>> -; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
>> ; GATHER-NEXT: br label [[FOR_BODY]]
>> ;
>> ; MAX-COST-LABEL: @PR32038(
>> @@ -259,18 +227,12 @@ define void @PR32038(i32 %n) {
>> ; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1
>> [[P5]], i32 2
>> ; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1
>> [[P7]], i32 3
>> ; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32>
>> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32
>> -80, i32 -80>
>> -; MAX-COST-NEXT: [[P20:%.*]] = add i32 -5, undef
>> -; MAX-COST-NEXT: [[P22:%.*]] = add i32 [[P20]], undef
>> -; MAX-COST-NEXT: [[P24:%.*]] = add i32 [[P22]], undef
>> -; MAX-COST-NEXT: [[P26:%.*]] = add i32 [[P24]], undef
>> ; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
>> -; MAX-COST-NEXT: [[P28:%.*]] = add i32 [[P26]], [[P27]]
>> ; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
>> ; MAX-COST-NEXT: [[TMP9:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
>> ; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]]
>> ; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]]
>> ; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5
>> -; MAX-COST-NEXT: [[P30:%.*]] = add i32 [[P28]], [[P29]]
>> ; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
>> ; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
>> ; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontal.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -46,12 +46,8 @@ define i32 @test_select(i32* noalias noc
>> ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]],
>> zeroinitializer
>> ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer,
>> [[TMP4]]
>> ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32>
>> [[TMP6]], <4 x i32> [[TMP4]]
>> -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_026]]
>> -; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD]], undef
>> -; CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef
>> ; CHECK-NEXT: [[TMP8:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
>> ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
>> -; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef
>> ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32*
>> [[P1_023]], i64 [[IDX_EXT]]
>> ; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, i32*
>> [[P2_024]], i64 [[IDX_EXT]]
>> ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_025]], 1
>> @@ -173,12 +169,8 @@ define i32 @reduction_with_br(i32* noali
>> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>*
>> ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]],
>> align 4
>> ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
>> -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_020]]
>> -; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], undef
>> -; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef
>> ; CHECK-NEXT: [[TMP5:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
>> ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
>> -; CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef
>> ; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
>> ; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label
>> [[FOR_END_LOOPEXIT:%.*]]
>> ; CHECK: if.end:
>> @@ -293,16 +285,8 @@ define i32 @test_unrolled_select(i8* noa
>> ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]],
>> zeroinitializer
>> ; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer,
>> [[TMP6]]
>> ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32>
>> [[TMP8]], <8 x i32> [[TMP6]]
>> -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_047]]
>> -; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD]], undef
>> -; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef
>> -; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef
>> -; CHECK-NEXT: [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef
>> -; CHECK-NEXT: [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef
>> -; CHECK-NEXT: [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef
>> ; CHECK-NEXT: [[TMP10:%.*]] = call i32
>> @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
>> ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
>> -; CHECK-NEXT: [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef
>> ; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
>> ; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label
>> [[FOR_END_LOOPEXIT:%.*]]
>> ; CHECK: if.end.86:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -13,11 +13,11 @@ define void @patatino(i64 %n, i64 %i, %s
>> ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I:%.*]],
>> metadata !19, metadata !DIExpression()), !dbg !24
>> ; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.S*
>> [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25
>> ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]],
>> %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26
>> -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata
>> !21, metadata !DIExpression()), !dbg !27
>> +; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !21,
>> metadata !DIExpression()), !dbg !27
>> ; CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]],
>> %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg
>> !26
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]],
>> align 8, !dbg !26, !tbaa !29
>> -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata
>> !22, metadata !DIExpression()), !dbg !33
>> +; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !22,
>> metadata !DIExpression()), !dbg !33
>> ; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]],
>> %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34
>> ; CHECK-NEXT: [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]],
>> %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35
>> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg
>> !36
>>
>> Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll?rev=372626&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll (added)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR31847.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -0,0 +1,153 @@
>> +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
>> +; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s |
>> FileCheck %s
>> +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
>> +
>> + at shift = common local_unnamed_addr global [10 x i32] zeroinitializer,
>> align 4
>> + at data = common local_unnamed_addr global [10 x i8*] zeroinitializer,
>> align 4
>> +
>> +define void @flat(i32 %intensity) {
>> +; CHECK-LABEL: @flat(
>> +; CHECK-NEXT: entry:
>> +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds
>> ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
>> +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds
>> ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
>> +; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds
>> ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
>> +; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds
>> ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
>> +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]]
>> +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8*
>> [[TMP2]], i32 [[SHR]]
>> +; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]]
>> +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8*
>> [[TMP3]], i32 [[SHR1]]
>> +; CHECK-NEXT: br label [[FOR_BODY:%.*]]
>> +; CHECK: for.cond.cleanup:
>> +; CHECK-NEXT: ret void
>> +; CHECK: for.body:
>> +; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]]
>> ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
>> +; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [
>> [[INC_1:%.*]], [[FOR_BODY]] ]
>> +; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
>> +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP4]] to i32
>> +; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[CONV]], -128
>> +; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
>> +; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP5]] to i32
>> +; CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128
>> +; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1
>> +; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i32 128, [[CONV]]
>> +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i32 [[SUB]], i32
>> [[SUB7]]
>> +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[SUB4]], -1
>> +; CHECK-NEXT: [[SUB12:%.*]] = sub nsw i32 128, [[CONV3]]
>> +; CHECK-NEXT: [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32
>> [[SUB12]]
>> +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]]
>> +; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]]
>> +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8*
>> [[D1_DATA_046]], i32 [[IDX_NEG]]
>> +; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1
>> +; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP6]] to i32
>> +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]],
>> [[INTENSITY:%.*]]
>> +; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8
>> +; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1
>> +; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8*
>> [[D1_DATA_046]], i32 [[ADD]]
>> +; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1
>> +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0
>> +; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8
>> +; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1
>> +; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8*
>> [[D1_DATA_046]], i32 [[TMP1]]
>> +; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
>> +; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32
>> +; CHECK-NEXT: [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128
>> +; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
>> +; CHECK-NEXT: [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32
>> +; CHECK-NEXT: [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128
>> +; CHECK-NEXT: [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1
>> +; CHECK-NEXT: [[SUB7_1:%.*]] = sub nsw i32 128, [[CONV_1]]
>> +; CHECK-NEXT: [[COND_1:%.*]] = select i1 [[CMP5_1]], i32 [[SUB_1]],
>> i32 [[SUB7_1]]
>> +; CHECK-NEXT: [[CMP8_1:%.*]] = icmp sgt i32 [[SUB4_1]], -1
>> +; CHECK-NEXT: [[SUB12_1:%.*]] = sub nsw i32 128, [[CONV3_1]]
>> +; CHECK-NEXT: [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32
>> [[SUB4_1]], i32 [[SUB12_1]]
>> +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]]
>> +; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]]
>> +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8*
>> [[ADD_PTR23]], i32 [[IDX_NEG_1]]
>> +; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1
>> +; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32
>> +; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]],
>> [[INTENSITY]]
>> +; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8
>> +; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1
>> +; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8*
>> [[ADD_PTR23]], i32 [[ADD_1]]
>> +; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1
>> +; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0
>> +; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8
>> +; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1
>> +; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8*
>> [[ADD_PTR23]], i32 [[TMP1]]
>> +; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2
>> +; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128
>> +; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]],
>> label [[FOR_BODY]]
>> +;
>> +entry:
>> + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]*
>> @shift, i32 0, i32 0), align 4
>> + %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]*
>> @shift, i32 0, i32 1), align 4
>> + %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]*
>> @data, i32 0, i32 0), align 4
>> + %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]*
>> @data, i32 0, i32 1), align 4
>> + %shr = lshr i32 1, %0
>> + %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr
>> + %shr1 = lshr i32 1, %1
>> + %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1
>> + br label %for.body
>> +
>> +for.cond.cleanup: ; preds = %for.body
>> + ret void
>> +
>> +for.body: ; preds = %for.body,
>> %entry
>> + %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ]
>> + %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ]
>> + %4 = load i8, i8* %arrayidx, align 1
>> + %conv = zext i8 %4 to i32
>> + %sub = add nsw i32 %conv, -128
>> + %5 = load i8, i8* %arrayidx2, align 1
>> + %conv3 = zext i8 %5 to i32
>> + %sub4 = add nsw i32 %conv3, -128
>> + %cmp5 = icmp sgt i32 %sub, -1
>> + %sub7 = sub nsw i32 128, %conv
>> + %cond = select i1 %cmp5, i32 %sub, i32 %sub7
>> + %cmp8 = icmp sgt i32 %sub4, -1
>> + %sub12 = sub nsw i32 128, %conv3
>> + %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12
>> + %add = add nsw i32 %cond14, %cond
>> + %idx.neg = sub nsw i32 0, %add
>> + %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg
>> + %6 = load i8, i8* %add.ptr, align 1
>> + %conv15 = zext i8 %6 to i32
>> + %add16 = add nsw i32 %conv15, %intensity
>> + %conv17 = trunc i32 %add16 to i8
>> + store i8 %conv17, i8* %add.ptr, align 1
>> + %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add
>> + %7 = load i8, i8* %add.ptr18, align 1
>> + %not.tobool = icmp eq i8 %7, 0
>> + %conv21 = zext i1 %not.tobool to i8
>> + store i8 %conv21, i8* %add.ptr18, align 1
>> + %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1
>> + %8 = load i8, i8* %arrayidx, align 1
>> + %conv.1 = zext i8 %8 to i32
>> + %sub.1 = add nsw i32 %conv.1, -128
>> + %9 = load i8, i8* %arrayidx2, align 1
>> + %conv3.1 = zext i8 %9 to i32
>> + %sub4.1 = add nsw i32 %conv3.1, -128
>> + %cmp5.1 = icmp sgt i32 %sub.1, -1
>> + %sub7.1 = sub nsw i32 128, %conv.1
>> + %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1
>> + %cmp8.1 = icmp sgt i32 %sub4.1, -1
>> + %sub12.1 = sub nsw i32 128, %conv3.1
>> + %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1
>> + %add.1 = add nsw i32 %cond14.1, %cond.1
>> + %idx.neg.1 = sub nsw i32 0, %add.1
>> + %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1
>> + %10 = load i8, i8* %add.ptr.1, align 1
>> + %conv15.1 = zext i8 %10 to i32
>> + %add16.1 = add nsw i32 %conv15.1, %intensity
>> + %conv17.1 = trunc i32 %add16.1 to i8
>> + store i8 %conv17.1, i8* %add.ptr.1, align 1
>> + %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1
>> + %11 = load i8, i8* %add.ptr18.1, align 1
>> + %not.tobool.1 = icmp eq i8 %11, 0
>> + %conv21.1 = zext i1 %not.tobool.1 to i8
>> + store i8 %conv21.1, i8* %add.ptr18.1, align 1
>> + %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1
>> + %inc.1 = add nsw i32 %y.045, 2
>> + %exitcond.1 = icmp eq i32 %inc.1, 128
>> + br i1 %exitcond.1, label %for.cond.cleanup, label %for.body
>> +}
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -18,23 +18,16 @@ define void @mainTest(i32* %ptr) #0 {
>> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
>> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
>> ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
>> -; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef
>> -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]]
>> -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]]
>> -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
>> -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]]
>> +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
>> +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1
>> ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
>> ; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
>> ; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
>> -; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef
>> ; CHECK-NEXT: br label [[LOOP]]
>> ; CHECK: bail_out:
>> ; CHECK-NEXT: ret void
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -20,10 +20,6 @@ define void @test() #0 {
>> ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
>> ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64
>> 1>, [[TMP5]]
>> ; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32,
>> i64 32, i64 32, i64 32>
>> -; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef
>> -; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef
>> -; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0
>> -; CHECK-NEXT: [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4
>> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64>
>> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -31,7 +27,6 @@ define void @test() #0 {
>> ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
>> ; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
>> -; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef
>> ; CHECK-NEXT: br label [[LOOP]]
>> ;
>> entry:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -11,40 +11,6 @@ define void @Test(i32) {
>> ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2
>> x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
>> i32 1>
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]],
>> i32 1
>> ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32
>> 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
>> -; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
>> -; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
>> -; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
>> -; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
>> -; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
>> -; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
>> -; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
>> -; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -52,7 +18,7 @@ define void @Test(i32) {
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
>> +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]]
>> ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
>> ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
>> ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
>> @@ -79,7 +45,6 @@ define void @Test(i32) {
>> ; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
>> ; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
>> ; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
>> -; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
>> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32
>> [[OP_EXTRA30]], i32 0
>> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32
>> 14910, i32 1
>> ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32
>> [[TMP2]], i32 0
>> @@ -101,40 +66,8 @@ define void @Test(i32) {
>> ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32>
>> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
>> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32>
>> [[SHUFFLE]], i32 1
>> ; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]],
>> <i32 0, i32 55, i32 285, i32 1240>
>> -; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
>> -; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]],
>> [[TMP0:%.*]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
>> -; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
>> -; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
>> ; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
>> -; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]],
>> [[VAL_20]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
>> ; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
>> -; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]],
>> [[VAL_34]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
>> ; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32>
>> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]],
>> [[RDX_SHUF]]
>> ; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -142,7 +75,7 @@ define void @Test(i32) {
>> ; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32>
>> [[BIN_RDX2]], i32 0
>> ; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
>> ; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
>> -; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
>> +; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]],
>> [[TMP0:%.*]]
>> ; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]],
>> [[TMP0]]
>> ; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]],
>> [[TMP0]]
>> ; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]],
>> [[TMP0]]
>> @@ -170,7 +103,6 @@ define void @Test(i32) {
>> ; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]],
>> [[TMP0]]
>> ; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]],
>> [[TMP0]]
>> ; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]],
>> [[TMP2]]
>> -; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
>> ; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
>> ; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]],
>> [[VAL_39]]
>> ; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR40310.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -13,21 +13,6 @@ define void @mainTest(i32 %param, i32 *
>> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]],
>> i32 15
>> ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered,
>> align 4
>> ; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32
>> 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32
>> 4, i32 3, i32 2, i32 1, i32 -1>
>> -; CHECK-NEXT: [[V14:%.*]] = and i32 [[TMP2]], undef
>> -; CHECK-NEXT: [[V16:%.*]] = and i32 undef, [[V14]]
>> -; CHECK-NEXT: [[V18:%.*]] = and i32 undef, [[V16]]
>> -; CHECK-NEXT: [[V20:%.*]] = and i32 undef, [[V18]]
>> -; CHECK-NEXT: [[V22:%.*]] = and i32 undef, [[V20]]
>> -; CHECK-NEXT: [[V24:%.*]] = and i32 undef, [[V22]]
>> -; CHECK-NEXT: [[V26:%.*]] = and i32 undef, [[V24]]
>> -; CHECK-NEXT: [[V28:%.*]] = and i32 undef, [[V26]]
>> -; CHECK-NEXT: [[V30:%.*]] = and i32 undef, [[V28]]
>> -; CHECK-NEXT: [[V32:%.*]] = and i32 undef, [[V30]]
>> -; CHECK-NEXT: [[V34:%.*]] = and i32 undef, [[V32]]
>> -; CHECK-NEXT: [[V36:%.*]] = and i32 undef, [[V34]]
>> -; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]]
>> -; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]]
>> -; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]],
>> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
>> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32>
>> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -38,7 +23,6 @@ define void @mainTest(i32 %param, i32 *
>> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]],
>> [[RDX_SHUF5]]
>> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
>> -; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]]
>> ; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
>> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32
>> [[V44]], i32 0
>> ; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32
>> [[OP_EXTRA]], i32 1
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/bad-reduction.ll Mon Sep
>> 23 09:25:03 2019
>> @@ -30,12 +30,6 @@ define i64 @load_bswap(%v8i8* %p) {
>> ; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24
>> ; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16
>> ; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8
>> -; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef
>> -; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef
>> -; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef
>> -; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]]
>> -; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]]
>> -; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4
>> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64>
>> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -45,7 +39,6 @@ define i64 @load_bswap(%v8i8* %p) {
>> ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]]
>> ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]]
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]]
>> -; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]]
>> ; CHECK-NEXT: ret i64 [[OP_EXTRA]]
>> ;
>> %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
>> @@ -108,12 +101,6 @@ define i64 @load_bswap_nop_shift(%v8i8*
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align
>> 1
>> ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
>> ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 56, i64
>> 48, i64 40, i64 32, i64 24, i64 16, i64 8, i64 0>
>> -; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef
>> -; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef
>> -; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef
>> -; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], undef
>> -; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], undef
>> -; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8
>> x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -121,7 +108,6 @@ define i64 @load_bswap_nop_shift(%v8i8*
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], undef
>> ; CHECK-NEXT: ret i64 [[TMP5]]
>> ;
>> %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
>> @@ -196,12 +182,6 @@ define i64 @load64le(i8* %arg) {
>> ; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40
>> ; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48
>> ; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56
>> -; CHECK-NEXT: [[O1:%.*]] = or i64 undef, [[Z0]]
>> -; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef
>> -; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef
>> -; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef
>> -; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]]
>> -; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4
>> x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64>
>> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -211,7 +191,6 @@ define i64 @load64le(i8* %arg) {
>> ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]]
>> ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]]
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]]
>> -; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]]
>> ; CHECK-NEXT: ret i64 [[OP_EXTRA]]
>> ;
>> %g1 = getelementptr inbounds i8, i8* %arg, i64 1
>> @@ -272,12 +251,6 @@ define i64 @load64le_nop_shift(i8* %arg)
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align
>> 1
>> ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
>> ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 0, i64
>> 8, i64 16, i64 24, i64 32, i64 40, i64 48, i64 56>
>> -; CHECK-NEXT: [[O1:%.*]] = or i64 undef, undef
>> -; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef
>> -; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef
>> -; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef
>> -; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], undef
>> -; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8
>> x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -285,7 +258,6 @@ define i64 @load64le_nop_shift(i8* %arg)
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64>
>> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], undef
>> ; CHECK-NEXT: ret i64 [[TMP5]]
>> ;
>> %g1 = getelementptr inbounds i8, i8* %arg, i64 1
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -100,16 +100,8 @@ define float @bazz() {
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([20 x float]* @arr to <8 x float>*), align 16
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([20 x float]* @arr1 to <8 x float>*), align 16
>> ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
>> -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
>> ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
>> -; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
>> -; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
>> -; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
>> -; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -119,7 +111,6 @@ define float @bazz() {
>> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
>> ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV6]]
>> -; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
>> ; CHECK-NEXT: store float [[OP_EXTRA5]], float* @res, align 4
>> ; CHECK-NEXT: ret float [[OP_EXTRA5]]
>> ;
>> @@ -131,16 +122,8 @@ define float @bazz() {
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> bitcast ([20 x float]* @arr to <8 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>*
>> bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]],
>> [[TMP1]]
>> -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
>> ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
>> -; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
>> -; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
>> -; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
>> -; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -150,7 +133,6 @@ define float @bazz() {
>> ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float>
>> [[BIN_RDX4]], i32 0
>> ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]],
>> [[CONV]]
>> ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV6]]
>> -; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
>> ; THRESHOLD-NEXT: store float [[OP_EXTRA5]], float* @res, align 4
>> ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]]
>> ;
>> @@ -205,17 +187,14 @@ define float @bazzz() {
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr to <4 x float>*), align 16
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr1 to <4 x float>*), align 16
>> ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
>> -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; CHECK-NEXT: store float [[TMP8]], float* @res, align 4
>> -; CHECK-NEXT: ret float [[TMP8]]
>> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; CHECK-NEXT: store float [[TMP5]], float* @res, align 4
>> +; CHECK-NEXT: ret float [[TMP5]]
>> ;
>> ; THRESHOLD-LABEL: @bazzz(
>> ; THRESHOLD-NEXT: entry:
>> @@ -224,17 +203,14 @@ define float @bazzz() {
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]],
>> [[TMP1]]
>> -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
>> -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float>
>> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float>
>> [[BIN_RDX]], [[RDX_SHUF1]]
>> -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; THRESHOLD-NEXT: store float [[TMP8]], float* @res, align 4
>> -; THRESHOLD-NEXT: ret float [[TMP8]]
>> +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4
>> +; THRESHOLD-NEXT: ret float [[TMP5]]
>> ;
>> entry:
>> %0 = load i32, i32* @n, align 4
>> @@ -267,16 +243,13 @@ define i32 @foo() {
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr to <4 x float>*), align 16
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr1 to <4 x float>*), align 16
>> ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
>> -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
>> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
>> ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4
>> ; CHECK-NEXT: ret i32 [[CONV4]]
>> ;
>> @@ -287,16 +260,13 @@ define i32 @foo() {
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]],
>> [[TMP1]]
>> -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
>> -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float>
>> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float>
>> [[BIN_RDX]], [[RDX_SHUF1]]
>> -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
>> -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
>> -; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
>> +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float>
>> [[BIN_RDX2]], i32 0
>> +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
>> +; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
>> ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4
>> ; THRESHOLD-NEXT: ret i32 [[CONV4]]
>> ;
>> @@ -330,11 +300,6 @@ define float @bar() {
>> ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr to <4 x float>*), align 16
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([20 x float]* @arr1 to <4 x float>*), align 16
>> ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
>> -; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef,
>> float undef
>> -; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]],
>> undef
>> -; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float
>> [[MAX_0_MUL3]], float undef
>> -; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]],
>> undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float>
>> [[TMP2]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
>> @@ -342,7 +307,6 @@ define float @bar() {
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float>
>> [[RDX_SHUF1]]
>> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float
>> [[MAX_0_MUL3_1]], float undef
>> ; CHECK-NEXT: store float [[TMP3]], float* @res, align 4
>> ; CHECK-NEXT: ret float [[TMP3]]
>> ;
>> @@ -351,11 +315,6 @@ define float @bar() {
>> ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr to <4 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>*
>> bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]],
>> [[TMP0]]
>> -; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef
>> -; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float
>> undef, float undef
>> -; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float
>> [[MAX_0_MUL3]], undef
>> -; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float
>> [[MAX_0_MUL3]], float undef
>> -; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float
>> [[MAX_0_MUL3_1]], undef
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float>
>> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float>
>> [[TMP2]], [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
>> @@ -363,7 +322,6 @@ define float @bar() {
>> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float>
>> [[RDX_SHUF1]]
>> ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float
>> [[MAX_0_MUL3_1]], float undef
>> ; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4
>> ; THRESHOLD-NEXT: ret float [[TMP3]]
>> ;
>> @@ -410,21 +368,6 @@ define float @f(float* nocapture readonl
>> ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP0]], align 4
>> -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>> ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>> ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 18
>> @@ -459,37 +402,6 @@ define float @f(float* nocapture readonl
>> ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 47
>> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x
>> float>*
>> ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP2]], align 4
>> -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>> -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>> -; CHECK-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
>> -; CHECK-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
>> -; CHECK-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
>> -; CHECK-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
>> -; CHECK-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
>> -; CHECK-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
>> -; CHECK-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
>> -; CHECK-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
>> -; CHECK-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
>> -; CHECK-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
>> -; CHECK-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
>> -; CHECK-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
>> -; CHECK-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
>> -; CHECK-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
>> -; CHECK-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]],
>> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -511,7 +423,6 @@ define float @f(float* nocapture readonl
>> ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float>
>> [[BIN_RDX14]], [[RDX_SHUF15]]
>> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float>
>> [[BIN_RDX16]], i32 0
>> ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
>> -; CHECK-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
>> ; CHECK-NEXT: ret float [[OP_RDX]]
>> ;
>> ; THRESHOLD-LABEL: @f(
>> @@ -533,21 +444,6 @@ define float @f(float* nocapture readonl
>> ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>> ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef
>> -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>> ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>> ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 18
>> @@ -582,37 +478,6 @@ define float @f(float* nocapture readonl
>> ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 47
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to
>> <32 x float>*
>> ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP2]], align 4
>> -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>> -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>> -; THRESHOLD-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
>> -; THRESHOLD-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
>> -; THRESHOLD-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
>> -; THRESHOLD-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
>> -; THRESHOLD-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
>> -; THRESHOLD-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
>> -; THRESHOLD-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
>> -; THRESHOLD-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
>> -; THRESHOLD-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
>> -; THRESHOLD-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
>> -; THRESHOLD-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
>> -; THRESHOLD-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
>> -; THRESHOLD-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
>> -; THRESHOLD-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
>> -; THRESHOLD-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float>
>> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19,
>> i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32
>> 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -634,7 +499,6 @@ define float @f(float* nocapture readonl
>> ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float>
>> [[BIN_RDX14]], [[RDX_SHUF15]]
>> ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float>
>> [[BIN_RDX16]], i32 0
>> ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
>> -; THRESHOLD-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
>> ; THRESHOLD-NEXT: ret float [[OP_RDX]]
>> ;
>> entry:
>> @@ -821,37 +685,6 @@ define float @f1(float* nocapture readon
>> ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 31
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP0]], align 4
>> -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]],
>> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -864,7 +697,6 @@ define float @f1(float* nocapture readon
>> ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]],
>> [[RDX_SHUF7]]
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float>
>> [[BIN_RDX8]], i32 0
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
>> -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>> ; CHECK-NEXT: ret float [[OP_EXTRA]]
>> ;
>> ; THRESHOLD-LABEL: @f1(
>> @@ -904,37 +736,6 @@ define float @f1(float* nocapture readon
>> ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 31
>> ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float>
>> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19,
>> i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32
>> 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float>
>> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -947,7 +748,6 @@ define float @f1(float* nocapture readon
>> ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float>
>> [[BIN_RDX6]], [[RDX_SHUF7]]
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float>
>> [[BIN_RDX8]], i32 0
>> ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]],
>> [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
>> ; THRESHOLD-NEXT: ret float [[OP_EXTRA]]
>> ;
>> entry:
>> @@ -1058,17 +858,12 @@ define float @loadadd31(float* nocapture
>> ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
>> ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 2
>> ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align
>> 4
>> -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
>> ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 3
>> ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 4
>> ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 5
>> ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 6
>> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x
>> float>*
>> ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]],
>> align 4
>> -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 8
>> ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 9
>> @@ -1079,14 +874,6 @@ define float @loadadd31(float* nocapture
>> ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 14
>> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x
>> float>*
>> ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
>> align 4
>> -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>> ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>> ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>> @@ -1105,21 +892,6 @@ define float @loadadd31(float* nocapture
>> ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 30
>> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x
>> float>*
>> ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP6]], align 4
>> -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]],
>> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
>> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1145,7 +917,6 @@ define float @loadadd31(float* nocapture
>> ; CHECK-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
>> ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
>> ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
>> -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> ; CHECK-NEXT: ret float [[TMP12]]
>> ;
>> ; THRESHOLD-LABEL: @loadadd31(
>> @@ -1154,17 +925,12 @@ define float @loadadd31(float* nocapture
>> ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]],
>> align 4
>> ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 2
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]],
>> align 4
>> -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
>> ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 3
>> ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 4
>> ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 5
>> ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 6
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4
>> x float>*
>> ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>*
>> [[TMP2]], align 4
>> -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 8
>> ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 9
>> @@ -1175,14 +941,6 @@ define float @loadadd31(float* nocapture
>> ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 14
>> ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8
>> x float>*
>> ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP4]], align 4
>> -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 15
>> ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 16
>> ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 17
>> @@ -1201,21 +959,6 @@ define float @loadadd31(float* nocapture
>> ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 30
>> ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to
>> <16 x float>*
>> ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>*
>> [[TMP6]], align 4
>> -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
>> -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
>> -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
>> -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
>> -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
>> -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
>> -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
>> -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
>> -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
>> -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
>> -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
>> -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
>> -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
>> -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float>
>> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32
>> 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1241,7 +984,6 @@ define float @loadadd31(float* nocapture
>> ; THRESHOLD-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]],
>> [[TMP10]]
>> ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]],
>> [[TMP1]]
>> ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
>> -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
>> ; THRESHOLD-NEXT: ret float [[TMP12]]
>> ;
>> entry:
>> @@ -1352,14 +1094,6 @@ define float @extra_args(float* nocaptur
>> ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
>> -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1369,7 +1103,6 @@ define float @extra_args(float* nocaptur
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>> ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>> ; CHECK-NEXT: ret float [[OP_EXTRA5]]
>> ;
>> ; THRESHOLD-LABEL: @extra_args(
>> @@ -1386,14 +1119,6 @@ define float @extra_args(float* nocaptur
>> ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1403,7 +1128,6 @@ define float @extra_args(float* nocaptur
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float>
>> [[BIN_RDX4]], i32 0
>> ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>> ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>> ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]]
>> ;
>> entry:
>> @@ -1452,16 +1176,6 @@ define float @extra_args_same_several_ti
>> ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; CHECK-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
>> -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
>> -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; CHECK-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]],
>> 5.000000e+00
>> -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
>> -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1473,7 +1187,6 @@ define float @extra_args_same_several_ti
>> ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> 5.000000e+00
>> ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]],
>> 5.000000e+00
>> ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]],
>> [[CONV]]
>> -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>> ; CHECK-NEXT: ret float [[OP_EXTRA7]]
>> ;
>> ; THRESHOLD-LABEL: @extra_args_same_several_times(
>> @@ -1490,16 +1203,6 @@ define float @extra_args_same_several_ti
>> ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; THRESHOLD-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]],
>> 5.000000e+00
>> -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
>> -; THRESHOLD-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]],
>> 5.000000e+00
>> -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
>> -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
>> -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1511,7 +1214,6 @@ define float @extra_args_same_several_ti
>> ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> 5.000000e+00
>> ; THRESHOLD-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]],
>> 5.000000e+00
>> ; THRESHOLD-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]],
>> [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>> ; THRESHOLD-NEXT: ret float [[OP_EXTRA7]]
>> ;
>> entry:
>> @@ -1564,14 +1266,6 @@ define float @extra_args_no_replace(floa
>> ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
>> -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
>> -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
>> -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1581,7 +1275,6 @@ define float @extra_args_no_replace(floa
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>> ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>> ; CHECK-NEXT: ret float [[OP_EXTRA5]]
>> ;
>> ; THRESHOLD-LABEL: @extra_args_no_replace(
>> @@ -1600,14 +1293,6 @@ define float @extra_args_no_replace(floa
>> ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float,
>> float* [[X]], i64 7
>> ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
>> ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>*
>> [[TMP0]], align 4
>> -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
>> -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
>> -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
>> -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
>> -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
>> -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
>> -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float>
>> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1617,7 +1302,6 @@ define float @extra_args_no_replace(floa
>> ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float>
>> [[BIN_RDX4]], i32 0
>> ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
>> ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]],
>> [[CONV]]
>> -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
>> ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]]
>> ;
>> entry:
>> @@ -1668,10 +1352,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
>> ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]],
>> zeroinitializer
>> ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
>> -; CHECK-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef
>> -; CHECK-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef
>> -; CHECK-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef
>> -; CHECK-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]],
>> <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -1679,7 +1359,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
>> ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
>> -; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
>> ; CHECK-NEXT: ret i32 [[OP_EXTRA3]]
>> ;
>> ; THRESHOLD-LABEL: @wobble(
>> @@ -1696,10 +1375,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>> ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]],
>> i32 3
>> ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]],
>> zeroinitializer
>> ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
>> -; THRESHOLD-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef
>> -; THRESHOLD-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef
>> -; THRESHOLD-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef
>> -; THRESHOLD-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef
>> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32>
>> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]],
>> [[RDX_SHUF]]
>> ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> @@ -1707,7 +1382,6 @@ define i32 @wobble(i32 %arg, i32 %bar) {
>> ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32>
>> [[BIN_RDX2]], i32 0
>> ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
>> ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]],
>> [[TMP9]]
>> -; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
>> ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA3]]
>> ;
>> bb:
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -12,19 +12,6 @@
>> define i32 @maxi8(i32) {
>> ; CHECK-LABEL: @maxi8(
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr to <8 x i32>*), align 16
>> -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
>> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
>> undef
>> -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
>> -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
>> undef
>> -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]]
>> @@ -34,9 +21,8 @@ define i32 @maxi8(i32) {
>> ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32>
>> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>> -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; CHECK-NEXT: ret i32 [[TMP16]]
>> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> +; CHECK-NEXT: ret i32 [[TMP3]]
>> ;
>> %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>> %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -66,35 +52,6 @@ define i32 @maxi8(i32) {
>> define i32 @maxi16(i32) {
>> ; CHECK-LABEL: @maxi16(
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast
>> ([32 x i32]* @arr to <16 x i32>*), align 16
>> -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
>> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
>> undef
>> -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
>> -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
>> undef
>> -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>> -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
>> -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
>> undef
>> -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
>> -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
>> undef
>> -; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
>> -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> undef
>> -; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
>> -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
>> undef
>> -; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
>> -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
>> undef
>> -; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
>> -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
>> undef
>> -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]],
>> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
>> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]]
>> @@ -107,9 +64,8 @@ define i32 @maxi16(i32) {
>> ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32>
>> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32>
>> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32>
>> [[RDX_SHUF7]]
>> -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
>> undef
>> -; CHECK-NEXT: ret i32 [[TMP32]]
>> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> +; CHECK-NEXT: ret i32 [[TMP3]]
>> ;
>> %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>> %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -163,67 +119,6 @@ define i32 @maxi16(i32) {
>> define i32 @maxi32(i32) {
>> ; CHECK-LABEL: @maxi32(
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast
>> ([32 x i32]* @arr to <32 x i32>*), align 16
>> -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
>> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32
>> undef
>> -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
>> -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32
>> undef
>> -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>> -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
>> -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
>> undef
>> -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
>> -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
>> undef
>> -; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
>> -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> undef
>> -; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
>> -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
>> undef
>> -; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
>> -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
>> undef
>> -; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
>> -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
>> undef
>> -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
>> -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
>> undef
>> -; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef
>> -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32
>> undef
>> -; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef
>> -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32
>> undef
>> -; CHECK-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef
>> -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32
>> undef
>> -; CHECK-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef
>> -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32
>> undef
>> -; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef
>> -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32
>> undef
>> -; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef
>> -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32
>> undef
>> -; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef
>> -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32
>> undef
>> -; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef
>> -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32
>> undef
>> -; CHECK-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef
>> -; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32
>> undef
>> -; CHECK-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef
>> -; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32
>> undef
>> -; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef
>> -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32
>> undef
>> -; CHECK-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef
>> -; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32
>> undef
>> -; CHECK-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef
>> -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32
>> undef
>> -; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef
>> -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32
>> undef
>> -; CHECK-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef
>> -; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32
>> undef
>> -; CHECK-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]],
>> <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]]
>> @@ -239,9 +134,8 @@ define i32 @maxi32(i32) {
>> ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32>
>> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32>
>> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32>
>> [[RDX_SHUF10]]
>> -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> -; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32
>> undef
>> -; CHECK-NEXT: ret i32 [[TMP64]]
>> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> +; CHECK-NEXT: ret i32 [[TMP3]]
>> ;
>> %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>> %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -343,19 +237,6 @@ define i32 @maxi32(i32) {
>> define float @maxf8(float) {
>> ; CHECK-LABEL: @maxf8(
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([32 x float]* @arr1 to <8 x float>*), align 16
>> -; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
>> undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
>> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]],
>> float undef
>> -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
>> -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]],
>> float undef
>> -; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
>> -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
>> float undef
>> -; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
>> float undef
>> -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
>> float undef
>> -; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float>
>> [[TMP2]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
>> @@ -365,9 +246,8 @@ define float @maxf8(float) {
>> ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float>
>> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float>
>> [[RDX_SHUF4]]
>> -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x float>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]],
>> float undef
>> -; CHECK-NEXT: ret float [[TMP16]]
>> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> +; CHECK-NEXT: ret float [[TMP3]]
>> ;
>> %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 0), align 16
>> %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 1), align 4
>> @@ -397,35 +277,6 @@ define float @maxf8(float) {
>> define float @maxf16(float) {
>> ; CHECK-LABEL: @maxf16(
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast
>> ([32 x float]* @arr1 to <16 x float>*), align 16
>> -; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
>> undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
>> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]],
>> float undef
>> -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
>> -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]],
>> float undef
>> -; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
>> -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
>> float undef
>> -; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
>> float undef
>> -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
>> float undef
>> -; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
>> -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]],
>> float undef
>> -; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
>> -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]],
>> float undef
>> -; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
>> -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]],
>> float undef
>> -; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
>> -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]],
>> float undef
>> -; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
>> -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]],
>> float undef
>> -; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
>> -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]],
>> float undef
>> -; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
>> -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]],
>> float undef
>> -; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
>> -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]],
>> float undef
>> -; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]],
>> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
>> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float>
>> [[TMP2]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
>> @@ -438,9 +289,8 @@ define float @maxf16(float) {
>> ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float>
>> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float>
>> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1>
>> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float>
>> [[RDX_SHUF7]]
>> -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x float>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]],
>> float undef
>> -; CHECK-NEXT: ret float [[TMP32]]
>> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float>
>> [[RDX_MINMAX_SELECT9]], i32 0
>> +; CHECK-NEXT: ret float [[TMP3]]
>> ;
>> %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 0), align 16
>> %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 1), align 4
>> @@ -494,67 +344,6 @@ define float @maxf16(float) {
>> define float @maxf32(float) {
>> ; CHECK-LABEL: @maxf32(
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast
>> ([32 x float]* @arr1 to <32 x float>*), align 16
>> -; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
>> -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float
>> undef
>> -; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
>> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]],
>> float undef
>> -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
>> -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]],
>> float undef
>> -; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
>> -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]],
>> float undef
>> -; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]],
>> float undef
>> -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]],
>> float undef
>> -; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
>> -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]],
>> float undef
>> -; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
>> -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]],
>> float undef
>> -; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
>> -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]],
>> float undef
>> -; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
>> -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]],
>> float undef
>> -; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
>> -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]],
>> float undef
>> -; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
>> -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]],
>> float undef
>> -; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
>> -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]],
>> float undef
>> -; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
>> -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]],
>> float undef
>> -; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
>> -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]],
>> float undef
>> -; CHECK-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef
>> -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]],
>> float undef
>> -; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef
>> -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]],
>> float undef
>> -; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef
>> -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]],
>> float undef
>> -; CHECK-NEXT: [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef
>> -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]],
>> float undef
>> -; CHECK-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef
>> -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]],
>> float undef
>> -; CHECK-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef
>> -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]],
>> float undef
>> -; CHECK-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef
>> -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]],
>> float undef
>> -; CHECK-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef
>> -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]],
>> float undef
>> -; CHECK-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef
>> -; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]],
>> float undef
>> -; CHECK-NEXT: [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef
>> -; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]],
>> float undef
>> -; CHECK-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef
>> -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]],
>> float undef
>> -; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef
>> -; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]],
>> float undef
>> -; CHECK-NEXT: [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef
>> -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]],
>> float undef
>> -; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef
>> -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]],
>> float undef
>> -; CHECK-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef
>> -; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]],
>> float undef
>> -; CHECK-NEXT: [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]],
>> <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float>
>> [[TMP2]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]]
>> @@ -570,9 +359,8 @@ define float @maxf32(float) {
>> ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float>
>> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> <i32 1, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float>
>> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1>
>> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float>
>> [[RDX_SHUF10]]
>> -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x float>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> -; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]],
>> float undef
>> -; CHECK-NEXT: ret float [[TMP64]]
>> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x float>
>> [[RDX_MINMAX_SELECT12]], i32 0
>> +; CHECK-NEXT: ret float [[TMP3]]
>> ;
>> %2 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 0), align 16
>> %3 = load float, float* getelementptr inbounds ([32 x float], [32 x
>> float]* @arr1, i64 0, i64 1), align 4
>> @@ -678,34 +466,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>> ; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>> ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>> ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> +; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>> ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>> ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>> ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; SSE-NEXT: [[TMP17:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; SSE-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
>> -; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
>> [[TMP15]]
>> -; SSE-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
>> -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]],
>> i32 [[TMP5]]
>> -; SSE-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; SSE-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
>> -; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]],
>> i32 [[TMP22]]
>> -; SSE-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> -; SSE-NEXT: store i32 [[TMP25]], i32* @var, align 8
>> -; SSE-NEXT: ret i32 [[TMP24]]
>> +; SSE-NEXT: [[TMP8:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
>> +; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> [[TMP7]]
>> +; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
>> +; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]],
>> i32 [[TMP5]]
>> +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
>> +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]],
>> i32 [[TMP12]]
>> +; SSE-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> +; SSE-NEXT: store i32 [[TMP15]], i32* @var, align 8
>> +; SSE-NEXT: ret i32 [[TMP14]]
>> ;
>> ; AVX-LABEL: @maxi8_mutiple_uses(
>> ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
>> @@ -713,34 +491,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>> ; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>> ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>> ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> +; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>> ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>> ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX-NEXT: [[TMP17:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
>> -; AVX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
>> [[TMP15]]
>> -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
>> -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]],
>> i32 [[TMP5]]
>> -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
>> -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]],
>> i32 [[TMP22]]
>> -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> -; AVX-NEXT: store i32 [[TMP25]], i32* @var, align 8
>> -; AVX-NEXT: ret i32 [[TMP24]]
>> +; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
>> +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> [[TMP7]]
>> +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
>> +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]],
>> i32 [[TMP5]]
>> +; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
>> +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]],
>> i32 [[TMP12]]
>> +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> +; AVX-NEXT: store i32 [[TMP15]], i32* @var, align 8
>> +; AVX-NEXT: ret i32 [[TMP14]]
>> ;
>> ; AVX2-LABEL: @maxi8_mutiple_uses(
>> ; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
>> @@ -748,34 +516,24 @@ define i32 @maxi8_mutiple_uses(i32) {
>> ; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
>> ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>> ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32
>> undef
>> -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>> ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>> ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX2-NEXT: [[TMP17:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX2-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
>> -; AVX2-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32
>> [[TMP15]]
>> -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
>> -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]],
>> i32 [[TMP5]]
>> -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
>> -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]],
>> i32 [[TMP22]]
>> -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> -; AVX2-NEXT: store i32 [[TMP25]], i32* @var, align 8
>> -; AVX2-NEXT: ret i32 [[TMP24]]
>> +; AVX2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
>> +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> [[TMP7]]
>> +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
>> +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]],
>> i32 [[TMP5]]
>> +; AVX2-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
>> +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]],
>> i32 [[TMP12]]
>> +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
>> +; AVX2-NEXT: store i32 [[TMP15]], i32* @var, align 8
>> +; AVX2-NEXT: ret i32 [[TMP14]]
>> ;
>> ; SKX-LABEL: @maxi8_mutiple_uses(
>> ; SKX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x
>> i32]* @arr to <2 x i32>*), align 16
>> @@ -797,26 +555,16 @@ define i32 @maxi8_mutiple_uses(i32) {
>> ; SKX-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
>> ; SKX-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32>
>> [[TMP9]], <2 x i32> [[TMP11]]
>> ; SKX-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
>> -; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
>> -; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32
>> undef
>> -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
>> -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32
>> undef
>> -; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
>> -; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32
>> undef
>> -; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP6]]
>> -; SKX-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
>> -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP14]]
>> -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP14]]
>> -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP6]]
>> -; SKX-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP27]]
>> -; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[OP_EXTRA]],
>> i32 [[TMP27]]
>> -; SKX-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
>> -; SKX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 3, i32 4
>> -; SKX-NEXT: store i32 [[TMP31]], i32* @var, align 8
>> -; SKX-NEXT: ret i32 [[TMP29]]
>> +; SKX-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
>> +; SKX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
>> +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]],
>> i32 [[TMP14]]
>> +; SKX-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> +; SKX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
>> +; SKX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]],
>> i32 [[TMP17]]
>> +; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
>> +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
>> +; SKX-NEXT: store i32 [[TMP21]], i32* @var, align 8
>> +; SKX-NEXT: ret i32 [[TMP19]]
>> ;
>> %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>> %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 1), align 4
>> @@ -854,33 +602,21 @@ define i32 @maxi8_wrong_parent(i32) {
>> ; SSE: pp:
>> ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>> ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> -; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
>> +; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> +; SSE-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>> ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>> ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>> ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; SSE-NEXT: [[TMP20:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; SSE-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
>> -; SSE-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> [[TMP15]]
>> -; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
>> -; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP18]]
>> -; SSE-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
>> -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP5]]
>> -; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
>> [[TMP18]]
>> +; SSE-NEXT: [[TMP9:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
>> +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
>> [[TMP7]]
>> +; SSE-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
>> +; SSE-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
>> [[TMP8]]
>> +; SSE-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
>> +; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]],
>> i32 [[TMP5]]
>> ; SSE-NEXT: ret i32 [[OP_EXTRA]]
>> ;
>> ; AVX-LABEL: @maxi8_wrong_parent(
>> @@ -891,33 +627,21 @@ define i32 @maxi8_wrong_parent(i32) {
>> ; AVX: pp:
>> ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>> ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
>> -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> -; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
>> +; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> +; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32
>> x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x
>> i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>> ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>> ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX-NEXT: [[TMP20:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
>> -; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> [[TMP15]]
>> -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
>> -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP18]]
>> -; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
>> -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP5]]
>> -; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
>> [[TMP18]]
>> +; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
>> +; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
>> [[TMP7]]
>> +; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
>> +; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
>> [[TMP8]]
>> +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
>> +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]],
>> i32 [[TMP5]]
>> ; AVX-NEXT: ret i32 [[OP_EXTRA]]
>> ;
>> ; AVX2-LABEL: @maxi8_wrong_parent(
>> @@ -928,33 +652,21 @@ define i32 @maxi8_wrong_parent(i32) {
>> ; AVX2: pp:
>> ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32
>> [[TMP3]]
>> ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32*
>> getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x
>> i32>*), align 8
>> -; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
>> -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32
>> undef
>> -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
>> -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32
>> undef
>> -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
>> -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32
>> undef
>> -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32
>> undef
>> -; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> -; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
>> -; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32
>> [[TMP15]]
>> -; AVX2-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> -; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
>> +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
>> +; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds
>> ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
>> ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]],
>> [[RDX_SHUF]]
>> ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
>> ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32
>> undef, i32 undef>
>> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
>> ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32>
>> [[RDX_SHUF1]]
>> -; AVX2-NEXT: [[TMP20:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> -; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
>> -; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> [[TMP15]]
>> -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
>> -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> [[TMP18]]
>> -; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
>> -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]],
>> i32 [[TMP5]]
>> -; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32
>> [[TMP18]]
>> +; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT3]], i32 0
>> +; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
>> +; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32
>> [[TMP7]]
>> +; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
>> +; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32
>> [[TMP8]]
>> +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
>> +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]],
>> i32 [[TMP5]]
>> ; AVX2-NEXT: ret i32 [[OP_EXTRA]]
>> ;
>> ; SKX-LABEL: @maxi8_wrong_parent(
>> @@ -985,21 +697,9 @@ define i32 @maxi8_wrong_parent(i32) {
>> ; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32
>> [[TMP4]], i32 1
>> ; SKX-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32>
>> [[TMP16]], <2 x i32> [[TMP18]]
>> ; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
>> -; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
>> -; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32
>> undef
>> -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
>> -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32
>> undef
>> -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
>> -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32
>> undef
>> -; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
>> -; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32
>> undef
>> -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]]
>> -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32
>> [[TMP7]]
>> -; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]]
>> -; SKX-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
>> -; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]]
>> -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]],
>> i32 [[TMP20]]
>> -; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32
>> [[TMP8]]
>> +; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
>> +; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
>> +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]],
>> i32 [[TMP20]]
>> ; SKX-NEXT: ret i32 [[OP_EXTRA]]
>> ;
>> %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]*
>> @arr, i64 0, i64 0), align 16
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -37,14 +37,11 @@ define i32 @add_red(float* %A, i32 %n) {
>> ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x
>> float>*
>> ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]],
>> align 4
>> ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float
>> 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
>> -; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
>> ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
>> ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
>> ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
>> @@ -77,14 +74,11 @@ define i32 @add_red(float* %A, i32 %n) {
>> ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x
>> float>*
>> ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]],
>> align 4
>> ; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float
>> 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
>> -; STORE-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; STORE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
>> ; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
>> ; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
>> ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
>> @@ -178,14 +172,11 @@ define i32 @mul_red(float* noalias %A, f
>> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>> ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>> ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
>> -; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>> ; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
>> ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
>> ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>> @@ -223,14 +214,11 @@ define i32 @mul_red(float* noalias %A, f
>> ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>> ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>> ; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
>> -; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>> ; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
>> ; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
>> ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>> @@ -350,13 +338,6 @@ define i32 @long_red(float* noalias %A,
>> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x
>> float>*
>> ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
>> align 4
>> ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
>> -; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
>> -; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>> -; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>> -; CHECK-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
>> -; CHECK-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
>> -; CHECK-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
>> -; CHECK-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
>> ; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
>> ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float,
>> float* [[A]], i64 [[ADD47]]
>> ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align
>> 4
>> @@ -369,7 +350,6 @@ define i32 @long_red(float* noalias %A,
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>> ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
>> -; CHECK-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
>> ; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
>> ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
>> ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
>> @@ -421,13 +401,6 @@ define i32 @long_red(float* noalias %A,
>> ; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x
>> float>*
>> ; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]],
>> align 4
>> ; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
>> -; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>> -; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>> -; STORE-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
>> -; STORE-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
>> -; STORE-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
>> -; STORE-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
>> ; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
>> ; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float,
>> float* [[A]], i64 [[ADD47]]
>> ; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align
>> 4
>> @@ -440,7 +413,6 @@ define i32 @long_red(float* noalias %A,
>> ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; STORE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>> ; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
>> -; STORE-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
>> ; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
>> ; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
>> ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
>> @@ -576,16 +548,12 @@ define i32 @chain_red(float* noalias %A,
>> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>> ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>> ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
>> -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
>> -; CHECK-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef
>> -; CHECK-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
>> -; CHECK-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
>> ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
>> ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>> ; CHECK-NEXT: br i1 [[EXITCOND]], label
>> [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
>> @@ -622,16 +590,12 @@ define i32 @chain_red(float* noalias %A,
>> ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>> ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>> ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
>> -; STORE-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
>> -; STORE-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef
>> -; STORE-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> ; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
>> -; STORE-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
>> ; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
>> ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
>> ; STORE-NEXT: br i1 [[EXITCOND]], label
>> [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
>> @@ -1087,14 +1051,11 @@ define i32 @store_red(float* noalias %A,
>> ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x
>> float>*
>> ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]],
>> align 4
>> ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]]
>> -; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
>> ; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4
>> ; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float*
>> [[C_ADDR_038]], i64 1
>> ; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1
>> @@ -1169,14 +1130,11 @@ define void @float_red_example4(float* %
>> ; STORE-LABEL: @float_red_example4(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast
>> ([32 x float]* @arr_float to <4 x float>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]],
>> <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float>
>> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1216,12 +1174,6 @@ define void @float_red_example8(float* %
>> ; STORE-LABEL: @float_red_example8(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast
>> ([32 x float]* @arr_float to <8 x float>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP0]],
>> <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1229,7 +1181,6 @@ define void @float_red_example8(float* %
>> ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float>
>> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1293,20 +1244,6 @@ define void @float_red_example16(float*
>> ; STORE-LABEL: @float_red_example16(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast
>> ([32 x float]* @arr_float to <16 x float>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
>> -; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
>> -; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
>> -; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
>> -; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
>> -; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
>> -; STORE-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
>> -; STORE-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
>> -; STORE-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
>> -; STORE-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
>> -; STORE-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
>> -; STORE-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
>> -; STORE-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP0]],
>> <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32
>> 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1316,7 +1253,6 @@ define void @float_red_example16(float*
>> ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float>
>> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]],
>> [[RDX_SHUF5]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x float>
>> [[BIN_RDX6]], i32 0
>> -; STORE-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
>> ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1372,14 +1308,11 @@ define void @i32_red_example4(i32* %res)
>> ; STORE-LABEL: @i32_red_example4(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <4 x i32>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; STORE-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1419,12 +1352,6 @@ define void @i32_red_example8(i32* %res)
>> ; STORE-LABEL: @i32_red_example8(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1432,7 +1359,6 @@ define void @i32_red_example8(i32* %res)
>> ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1496,20 +1422,6 @@ define void @i32_red_example16(i32* %res
>> ; STORE-LABEL: @i32_red_example16(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast
>> ([32 x i32]* @arr_i32 to <16 x i32>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> -; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
>> -; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
>> -; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
>> -; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
>> -; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
>> -; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
>> -; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]],
>> <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
>> i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32>
>> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1519,7 +1431,6 @@ define void @i32_red_example16(i32* %res
>> ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32>
>> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]],
>> [[RDX_SHUF5]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]],
>> i32 0
>> -; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
>> ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1631,36 +1542,6 @@ define void @i32_red_example32(i32* %res
>> ; STORE-LABEL: @i32_red_example32(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast
>> ([32 x i32]* @arr_i32 to <32 x i32>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> -; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
>> -; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
>> -; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
>> -; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
>> -; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
>> -; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
>> -; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
>> -; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
>> -; STORE-NEXT: [[ADD_15:%.*]] = add nsw i32 undef, [[ADD_14]]
>> -; STORE-NEXT: [[ADD_16:%.*]] = add nsw i32 undef, [[ADD_15]]
>> -; STORE-NEXT: [[ADD_17:%.*]] = add nsw i32 undef, [[ADD_16]]
>> -; STORE-NEXT: [[ADD_18:%.*]] = add nsw i32 undef, [[ADD_17]]
>> -; STORE-NEXT: [[ADD_19:%.*]] = add nsw i32 undef, [[ADD_18]]
>> -; STORE-NEXT: [[ADD_20:%.*]] = add nsw i32 undef, [[ADD_19]]
>> -; STORE-NEXT: [[ADD_21:%.*]] = add nsw i32 undef, [[ADD_20]]
>> -; STORE-NEXT: [[ADD_22:%.*]] = add nsw i32 undef, [[ADD_21]]
>> -; STORE-NEXT: [[ADD_23:%.*]] = add nsw i32 undef, [[ADD_22]]
>> -; STORE-NEXT: [[ADD_24:%.*]] = add nsw i32 undef, [[ADD_23]]
>> -; STORE-NEXT: [[ADD_25:%.*]] = add nsw i32 undef, [[ADD_24]]
>> -; STORE-NEXT: [[ADD_26:%.*]] = add nsw i32 undef, [[ADD_25]]
>> -; STORE-NEXT: [[ADD_27:%.*]] = add nsw i32 undef, [[ADD_26]]
>> -; STORE-NEXT: [[ADD_28:%.*]] = add nsw i32 undef, [[ADD_27]]
>> -; STORE-NEXT: [[ADD_29:%.*]] = add nsw i32 undef, [[ADD_28]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]],
>> <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32>
>> [[BIN_RDX]], <32 x i32> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11,
>> i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1672,7 +1553,6 @@ define void @i32_red_example32(i32* %res
>> ; STORE-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32>
>> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]],
>> [[RDX_SHUF7]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]],
>> i32 0
>> -; STORE-NEXT: [[ADD_30:%.*]] = add nsw i32 undef, [[ADD_29]]
>> ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1750,12 +1630,6 @@ define void @i32_red_call(i32 %val) {
>> ; CHECK-LABEL: @i32_red_call(
>> ; CHECK-NEXT: entry:
>> ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1763,19 +1637,12 @@ define void @i32_red_call(i32 %val) {
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> ; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
>> ; CHECK-NEXT: ret void
>> ;
>> ; STORE-LABEL: @i32_red_call(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1783,7 +1650,6 @@ define void @i32_red_call(i32 %val) {
>> ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> ; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
>> ; STORE-NEXT: ret void
>> ;
>> @@ -1811,12 +1677,6 @@ define void @i32_red_invoke(i32 %val) pe
>> ; CHECK-LABEL: @i32_red_invoke(
>> ; CHECK-NEXT: entry:
>> ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1824,7 +1684,6 @@ define void @i32_red_invoke(i32 %val) pe
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
>> ; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
>> ; CHECK: exception:
>> @@ -1837,12 +1696,6 @@ define void @i32_red_invoke(i32 %val) pe
>> ; STORE-LABEL: @i32_red_invoke(
>> ; STORE-NEXT: entry:
>> ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32
>> x i32]* @arr_i32 to <8 x i32>*), align 16
>> -; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
>> -; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
>> -; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
>> -; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
>> -; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
>> -; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
>> ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]],
>> [[RDX_SHUF]]
>> ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -1850,7 +1703,6 @@ define void @i32_red_invoke(i32 %val) pe
>> ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
>> ; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
>> ; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
>> ; STORE: exception:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/long_chains.ll Mon Sep
>> 23 09:25:03 2019
>> @@ -12,10 +12,10 @@ define i32 @test(double* nocapture %A, i
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align
>> 1
>> ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 3, i8 3>
>> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
>> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> undef, i8
>> [[TMP3]], i32 0
>> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
>> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8
>> [[TMP5]], i32 1
>> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
>> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
>> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i8> undef, i8
>> [[TMP4]], i32 0
>> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8
>> [[TMP3]], i32 1
>> ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double>
>> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]
>> ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], <double
>> 1.000000e+00, double 1.000000e+00>
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
>> Mon Sep 23 09:25:03 2019
>> @@ -5,36 +5,6 @@ define signext i8 @Foo(<32 x i8>* %__v)
>> ; CHECK-LABEL: @Foo(
>> ; CHECK-NEXT: entry:
>> ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]],
>> align 32
>> -; CHECK-NEXT: [[ADD_I_1_I:%.*]] = add i8 undef, undef
>> -; CHECK-NEXT: [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], undef
>> -; CHECK-NEXT: [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], undef
>> -; CHECK-NEXT: [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], undef
>> -; CHECK-NEXT: [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], undef
>> -; CHECK-NEXT: [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], undef
>> -; CHECK-NEXT: [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], undef
>> -; CHECK-NEXT: [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], undef
>> -; CHECK-NEXT: [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], undef
>> -; CHECK-NEXT: [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], undef
>> -; CHECK-NEXT: [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], undef
>> -; CHECK-NEXT: [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], undef
>> -; CHECK-NEXT: [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], undef
>> -; CHECK-NEXT: [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], undef
>> -; CHECK-NEXT: [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], undef
>> -; CHECK-NEXT: [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], undef
>> -; CHECK-NEXT: [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], undef
>> -; CHECK-NEXT: [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], undef
>> -; CHECK-NEXT: [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], undef
>> -; CHECK-NEXT: [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], undef
>> -; CHECK-NEXT: [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], undef
>> -; CHECK-NEXT: [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], undef
>> -; CHECK-NEXT: [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], undef
>> -; CHECK-NEXT: [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], undef
>> -; CHECK-NEXT: [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], undef
>> -; CHECK-NEXT: [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], undef
>> -; CHECK-NEXT: [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], undef
>> -; CHECK-NEXT: [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], undef
>> -; CHECK-NEXT: [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], undef
>> -; CHECK-NEXT: [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]],
>> <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32
>> 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30,
>> i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i8>
>> [[BIN_RDX]], <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32
>> 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -46,7 +16,6 @@ define signext i8 @Foo(<32 x i8>* %__v)
>> ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i8>
>> [[BIN_RDX6]], <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
>> i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]],
>> [[RDX_SHUF7]]
>> ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]],
>> i32 0
>> -; CHECK-NEXT: [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], undef
>> ; CHECK-NEXT: ret i8 [[TMP1]]
>> ;
>> entry:
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_loads.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -35,13 +35,6 @@ define i32 @test(i32* nocapture readonly
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42,
>> i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
>> -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]]
>> -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
>> -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -50,7 +43,6 @@ define i32 @test(i32* nocapture readonly
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]]
>> -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>> ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>> ; CHECK: for.end:
>> ; CHECK-NEXT: ret i32 [[OP_EXTRA]]
>> @@ -138,13 +130,6 @@ define i32 @test2(i32* nocapture readonl
>> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
>> ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]],
>> align 4
>> ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
>> -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]]
>> -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
>> -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -153,7 +138,6 @@ define i32 @test2(i32* nocapture readonl
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
>> -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>> ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>> ; CHECK: for.end:
>> ; CHECK-NEXT: ret i32 [[OP_EXTRA]]
>> @@ -258,13 +242,6 @@ define i32 @test3(i32* nocapture readonl
>> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
>> ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]],
>> align 4
>> ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]],
>> [[TMP3]]
>> -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]]
>> -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]]
>> -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
>> -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
>> -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
>> -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
>> -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -273,7 +250,6 @@ define i32 @test3(i32* nocapture readonl
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
>> -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
>> ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
>> ; CHECK: for.end:
>> ; CHECK-NEXT: ret i32 [[OP_EXTRA]]
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>> Mon Sep 23 09:25:03 2019
>> @@ -26,12 +26,6 @@ define i32 @test_add(i32* nocapture read
>> ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[MUL_18:%.*]] = add i32 undef, undef
>> -; CHECK-NEXT: [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
>> -; CHECK-NEXT: [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
>> -; CHECK-NEXT: [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
>> -; CHECK-NEXT: [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
>> -; CHECK-NEXT: [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -39,7 +33,6 @@ define i32 @test_add(i32* nocapture read
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
>> ; CHECK-NEXT: ret i32 [[TMP2]]
>> ;
>> entry:
>> @@ -147,12 +140,6 @@ define i32 @test_and(i32* nocapture read
>> ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[MUL_18:%.*]] = and i32 undef, undef
>> -; CHECK-NEXT: [[MUL_29:%.*]] = and i32 undef, [[MUL_18]]
>> -; CHECK-NEXT: [[MUL_310:%.*]] = and i32 undef, [[MUL_29]]
>> -; CHECK-NEXT: [[MUL_411:%.*]] = and i32 undef, [[MUL_310]]
>> -; CHECK-NEXT: [[MUL_512:%.*]] = and i32 undef, [[MUL_411]]
>> -; CHECK-NEXT: [[MUL_613:%.*]] = and i32 undef, [[MUL_512]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -160,7 +147,6 @@ define i32 @test_and(i32* nocapture read
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[MUL_714:%.*]] = and i32 undef, [[MUL_613]]
>> ; CHECK-NEXT: ret i32 [[TMP2]]
>> ;
>> entry:
>> @@ -208,12 +194,6 @@ define i32 @test_or(i32* nocapture reado
>> ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[MUL_18:%.*]] = or i32 undef, undef
>> -; CHECK-NEXT: [[MUL_29:%.*]] = or i32 undef, [[MUL_18]]
>> -; CHECK-NEXT: [[MUL_310:%.*]] = or i32 undef, [[MUL_29]]
>> -; CHECK-NEXT: [[MUL_411:%.*]] = or i32 undef, [[MUL_310]]
>> -; CHECK-NEXT: [[MUL_512:%.*]] = or i32 undef, [[MUL_411]]
>> -; CHECK-NEXT: [[MUL_613:%.*]] = or i32 undef, [[MUL_512]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -221,7 +201,6 @@ define i32 @test_or(i32* nocapture reado
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[MUL_714:%.*]] = or i32 undef, [[MUL_613]]
>> ; CHECK-NEXT: ret i32 [[TMP2]]
>> ;
>> entry:
>> @@ -269,12 +248,6 @@ define i32 @test_xor(i32* nocapture read
>> ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32*
>> [[P]], i64 7
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[MUL_18:%.*]] = xor i32 undef, undef
>> -; CHECK-NEXT: [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]]
>> -; CHECK-NEXT: [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]]
>> -; CHECK-NEXT: [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]]
>> -; CHECK-NEXT: [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]]
>> -; CHECK-NEXT: [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> @@ -282,7 +255,6 @@ define i32 @test_xor(i32* nocapture read
>> ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32>
>> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef, i32 undef, i32 undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]],
>> [[RDX_SHUF3]]
>> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]],
>> i32 0
>> -; CHECK-NEXT: [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]]
>> ; CHECK-NEXT: ret i32 [[TMP2]]
>> ;
>> entry:
>> @@ -322,15 +294,12 @@ define i32 @PR37731(<4 x i32>* noalias n
>> ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], <i32 18, i32 2,
>> i32 7, i32 13>
>> ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]]
>> ; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16
>> -; CHECK-NEXT: [[TMP7:%.*]] = xor i32 undef, undef
>> -; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32>
>> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32
>> undef>
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> -; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], undef
>> -; CHECK-NEXT: ret i32 [[TMP9]]
>> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> +; CHECK-NEXT: ret i32 [[TMP7]]
>> ;
>> entry:
>> %0 = load <4 x i32>, <4 x i32>* %self, align 16
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -33,11 +33,8 @@ define i32 @foo(i32* %diff) #0 {
>> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
>> ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]],
>> align 4
>> ; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]]
>> -; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 undef, [[A_088]]
>> ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x
>> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1
>> -; CHECK-NEXT: [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef
>> ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x
>> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2
>> -; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef
>> ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x
>> i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3
>> ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
>> ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align
>> 16
>> @@ -47,7 +44,6 @@ define i32 @foo(i32* %diff) #0 {
>> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]],
>> [[RDX_SHUF1]]
>> ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]],
>> i32 0
>> ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]]
>> -; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef
>> ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
>> ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
>> ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label
>> [[FOR_BODY]]
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>> (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
>> Mon Sep 23 09:25:03 2019
>> @@ -19,11 +19,6 @@ define void @hoge() {
>> ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
>> ; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2
>> x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
>> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], <i32 undef,
>> i32 15, i32 31, i32 47>
>> -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32
>> undef
>> -; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef
>> -; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32
>> undef
>> -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef
>> ; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]],
>> <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]],
>> [[RDX_SHUF9]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]]
>> @@ -31,28 +26,12 @@ define void @hoge() {
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32>
>> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32>
>> [[RDX_SHUF12]]
>> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32>
>> [[RDX_MINMAX_SELECT14]], i32 0
>> -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32
>> undef
>> ; CHECK-NEXT: [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
>> ; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63
>> ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
>> ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
>> ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2
>> x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
>> ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49,
>> i32 -33, i32 -33, i32 -17>
>> -; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32
>> undef
>> -; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef
>> -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 undef, i32
>> [[TMP27]]
>> -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 undef, i32
>> undef
>> -; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP29]]
>> -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP29]], i32
>> [[TMP32]]
>> -; CHECK-NEXT: [[TMP36:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 undef, i32
>> undef
>> -; CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP37]], [[TMP34]]
>> -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP34]], i32
>> [[TMP37]]
>> -; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 undef, undef
>> -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32
>> undef
>> -; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]]
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4
>> x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1>
>> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]]
>> @@ -70,7 +49,6 @@ define void @hoge() {
>> ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32
>> [[OP_EXTRA5]], i32 undef
>> ; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef
>> ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32
>> [[OP_EXTRA6]], i32 undef
>> -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32
>> [[TMP42]]
>> ; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]]
>> ; CHECK-NEXT: unreachable
>> ;
>>
>> Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll (original)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/undef_vect.ll Mon Sep 23
>> 09:25:03 2019
>> @@ -16,15 +16,6 @@ define void @_Z2azv() local_unnamed_addr
>> ; CHECK-NEXT: [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds
>> %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76",
>> %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1
>> ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x
>> i32>*
>> ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]],
>> align 4
>> -; CHECK-NEXT: [[CMP_I1_4:%.*]] = icmp slt i32 undef, undef
>> -; CHECK-NEXT: [[DOTSROA_SPECULATED_4:%.*]] = select i1 [[CMP_I1_4]],
>> i32 undef, i32 undef
>> -; CHECK-NEXT: [[CMP_I1_5:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_4]], undef
>> -; CHECK-NEXT: [[DOTSROA_SPECULATED_5:%.*]] = select i1 [[CMP_I1_5]],
>> i32 undef, i32 [[DOTSROA_SPECULATED_4]]
>> -; CHECK-NEXT: [[CMP_I1_6:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_5]], undef
>> -; CHECK-NEXT: [[DOTSROA_SPECULATED_6:%.*]] = select i1 [[CMP_I1_6]],
>> i32 undef, i32 [[DOTSROA_SPECULATED_5]]
>> -; CHECK-NEXT: [[CMP_I1_7:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_6]], undef
>> -; CHECK-NEXT: [[DOTSROA_SPECULATED_7:%.*]] = select i1 [[CMP_I1_7]],
>> i32 undef, i32 [[DOTSROA_SPECULATED_6]]
>> -; CHECK-NEXT: [[CMP_I1_8:%.*]] = icmp slt i32 undef, undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8
>> x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,
>> i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP1]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP1]], <8 x i32> [[RDX_SHUF]]
>> @@ -39,7 +30,6 @@ define void @_Z2azv() local_unnamed_addr
>> ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]],
>> i32 undef
>> ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef
>> ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP4]], i32
>> [[OP_EXTRA]], i32 undef
>> -; CHECK-NEXT: [[DOTSROA_SPECULATED_8:%.*]] = select i1 [[CMP_I1_8]],
>> i32 undef, i32 undef
>> ; CHECK-NEXT: [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32
>> undef, i32 [[OP_EXTRA7]]
>> ; CHECK-NEXT: [[CMP_I1_10:%.*]] = icmp slt i32
>> [[DOTSROA_SPECULATED_9]], undef
>> ; CHECK-NEXT: ret void
>>
>> Modified:
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll?rev=372626&r1=372625&r2=372626&view=diff
>>
>> ==============================================================================
>> ---
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
>> (original)
>> +++
>> llvm/trunk/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll Mon
>> Sep 23 09:25:03 2019
>> @@ -18,19 +18,6 @@ define i32 @foo(i32* nocapture readonly
>> ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
>> [[A7:%.*]], i32 6
>> ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
>> [[A8:%.*]], i32 7
>> ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
>> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef
>> -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
>> -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
>> -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
>> undef
>> -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
>> -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
>> -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
>> -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
>> -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]],
>> <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
>> @@ -41,7 +28,6 @@ define i32 @foo(i32* nocapture readonly
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
>> i32 undef
>> ; CHECK-NEXT: ret i32 [[TMP11]]
>> ;
>> entry:
>> @@ -92,19 +78,6 @@ define i32 @foo1(i32* nocapture readonly
>> ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
>> [[A7:%.*]], i32 6
>> ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
>> [[A8:%.*]], i32 7
>> ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
>> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef
>> -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
>> -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
>> -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
>> undef
>> -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
>> -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
>> -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
>> -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
>> -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]],
>> <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
>> @@ -115,7 +88,6 @@ define i32 @foo1(i32* nocapture readonly
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
>> i32 undef
>> ; CHECK-NEXT: ret i32 [[TMP11]]
>> ;
>> entry:
>> @@ -170,19 +142,6 @@ define i32 @foo2(i32* nocapture readonly
>> ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32
>> [[A7:%.*]], i32 6
>> ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32
>> [[A8:%.*]], i32 7
>> ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
>> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef
>> -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
>> -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
>> -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32
>> undef
>> -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
>> -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
>> -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
>> -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
>> -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]],
>> i32 undef
>> -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
>> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]],
>> <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32
>> undef, i32 undef, i32 undef>
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]],
>> [[RDX_SHUF]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
>> @@ -193,7 +152,6 @@ define i32 @foo2(i32* nocapture readonly
>> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32>
>> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1>
>> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32>
>> [[RDX_SHUF4]]
>> ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32>
>> [[RDX_MINMAX_SELECT6]], i32 0
>> -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]],
>> i32 undef
>> ; CHECK-NEXT: ret i32 [[TMP11]]
>> ;
>> entry:
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190926/49c691a4/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4849 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190926/49c691a4/attachment-0001.bin>
More information about the llvm-commits
mailing list