[llvm-branch-commits] [llvm] [LV] Use VPReductionRecipe for partial reductions (PR #147513)
Sam Tebbs via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Oct 20 07:01:37 PDT 2025
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/147513
>From 9130747cfca201ab92142805f7c593fcec260c50 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 18 Jun 2025 15:50:31 +0100
Subject: [PATCH 1/2] [LV] Use VPReductionRecipe for partial reductions
Partial reductions can easily be represented by the VPReductionRecipe
class by setting their scale factor to something greater than 1. This PR
merges the two together and gives VPReductionRecipe a VFScaleFactor so
that it can choose to generate the partial reduction intrinsic at
execute time.
Depends on https://github.com/llvm/llvm-project/pull/144281
---
.../Transforms/Vectorize/LoopVectorize.cpp | 33 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 135 ++--
.../Transforms/Vectorize/VPlanAnalysis.cpp | 19 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 147 +---
.../Transforms/Vectorize/VPlanTransforms.cpp | 6 +-
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 -
.../partial-reduce-dot-product-epilogue.ll | 246 ++----
.../partial-reduce-dot-product-neon.ll | 726 ++++++------------
.../AArch64/partial-reduce-dot-product.ll | 2 +-
.../LoopVectorize/AArch64/vplan-printing.ll | 2 +-
.../Transforms/Vectorize/VPlanTest.cpp | 8 +-
12 files changed, 442 insertions(+), 885 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cee08ef94aeb5..d11342f41c13a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6937,7 +6937,8 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
}
// The VPlan-based cost model is more accurate for partial reduction and
// comparing against the legacy cost isn't desirable.
- if (isa<VPPartialReductionRecipe>(&R))
+ if (auto *VPR = dyn_cast<VPReductionRecipe>(&R);
+ VPR && VPR->isPartialReduction())
return true;
// The VPlan-based cost model can analyze if recipes are scalar
@@ -8056,11 +8057,15 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
// If the PHI is used by a partial reduction, set the scale factor.
+ bool UseInLoopReduction = CM.isInLoopReduction(Phi);
+ bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
unsigned ScaleFactor =
- getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
- PhiRecipe = new VPReductionPHIRecipe(
- Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
- CM.useOrderedReductions(RdxDesc), ScaleFactor);
+ (UseOrderedReductions || UseInLoopReduction)
+ ? 0
+ : getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
+ PhiRecipe =
+ new VPReductionPHIRecipe(Phi, RdxDesc.getRecurrenceKind(), *StartV,
+ UseOrderedReductions, ScaleFactor);
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
@@ -8128,7 +8133,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
VPValue *Accumulator = Operands[1];
VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
if (isa<VPReductionPHIRecipe>(BinOpRecipe) ||
- isa<VPPartialReductionRecipe>(BinOpRecipe))
+ (isa<VPReductionRecipe>(BinOpRecipe) &&
+ cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
std::swap(BinOp, Accumulator);
if (ScaleFactor !=
@@ -8153,12 +8159,10 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
Cond = getBlockInMask(Builder.getInsertBlock());
- VPValue *Zero =
- Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
- BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
}
- return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
- ScaleFactor, Reduction);
+
+ return new VPReductionRecipe(RecurKind::Add, FastMathFlags(), Reduction,
+ Accumulator, BinOp, Cond, false, ScaleFactor);
}
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8669,9 +8673,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
: FastMathFlags();
+ bool UseOrderedReductions = PhiR->isOrdered();
+ unsigned VFScaleFactor = !UseOrderedReductions;
auto *RedRecipe = new VPReductionRecipe(
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
- PhiR->isOrdered(), CurrentLinkI->getDebugLoc());
+ UseOrderedReductions, VFScaleFactor, CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
@@ -8706,8 +8712,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Don't output selects for partial reductions because they have an output
// with fewer lanes than the VF. So the operands of the select would have
// different numbers of lanes. Partial reductions mask the input instead.
+ auto *RR = dyn_cast<VPReductionRecipe>(OrigExitingVPV->getDefiningRecipe());
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
- !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
+ (!RR || !RR->isPartialReduction())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e369c2abc5671..668c7a5750398 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -558,7 +558,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
- case VPRecipeBase::VPPartialReductionSC:
return true;
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPInterleaveEVLSC:
@@ -2333,24 +2332,27 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// The recurrence kind of the reduction.
const RecurKind Kind;
- /// The phi is part of an in-loop reduction.
- bool IsInLoop;
-
/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
bool IsOrdered;
- /// When expanding the reduction PHI, the plan's VF element count is divided
- /// by this factor to form the reduction phi's VF.
- unsigned VFScaleFactor = 1;
+ /// The scaling factor, relative to the VF, that this recipe's output is
+ /// divided by.
+ /// For outer-loop reductions this is equal to 1.
+ /// For in-loop reductions this is equal to 0, to specify that this is equal
+ /// to the VF (which may not be known yet). For partial-reductions this is
+ /// equal to another scalar value.
+ unsigned VFScaleFactor;
public:
/// Create a new VPReductionPHIRecipe for the reduction \p Phi.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
- bool IsInLoop = false, bool IsOrdered = false,
- unsigned VFScaleFactor = 1)
+ bool IsOrdered = false, unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
- IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
- assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
+ IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
+ assert((!IsOrdered || isInLoop()) &&
+ "IsOrdered requires the reduction to be in-loop");
+ assert(((!isInLoop() && !IsOrdered) || isInLoop()) &&
+ "Invalid VFScaleFactor");
}
~VPReductionPHIRecipe() override = default;
@@ -2358,7 +2360,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(
dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(),
- *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
+ *getOperand(0), IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
}
@@ -2388,8 +2390,11 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
- /// Returns true, if the phi is part of an in-loop reduction.
- bool isInLoop() const { return IsInLoop; }
+ /// Returns true if the phi is part of an in-loop reduction.
+ bool isInLoop() const { return VFScaleFactor == 0; }
+
+ /// Returns true if the reduction outputs a vector with a scaled down VF.
+ bool isPartialReduction() const { return VFScaleFactor > 1; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
@@ -2661,23 +2666,32 @@ class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
}
};
-/// A recipe to represent inloop reduction operations, performing a reduction on
-/// a vector operand into a scalar value, and adding the result to a chain.
-/// The Operands are {ChainOp, VecOp, [Condition]}.
+/// A recipe to represent inloop, ordered or partial reduction operations. It
+/// performs a reduction on a vector operand into a scalar (vector in the case
+/// of a partial reduction) value, and adds the result to a chain. The Operands
+/// are {ChainOp, VecOp, [Condition]}.
class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
/// The recurrence kind for the reduction in question.
RecurKind RdxKind;
bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;
+ /// The scaling factor, relative to the VF, that this recipe's output is
+ /// divided by.
+ /// For outer-loop reductions this is equal to 1.
+ /// For in-loop reductions this is equal to 0, to specify that this is equal
+ /// to the VF (which may not be known yet).
+ /// For partial-reductions this is equal to another scalar value.
+ unsigned VFScaleFactor;
protected:
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
FastMathFlags FMFs, Instruction *I,
ArrayRef<VPValue *> Operands, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL)
+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
- IsOrdered(IsOrdered) {
+ IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
+ assert((!IsOrdered || VFScaleFactor == 0) && "Invalid scale factor");
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
@@ -2688,30 +2702,31 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
public:
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
+ bool IsOrdered, unsigned VFScaleFactor,
+ DebugLoc DL = DebugLoc::getUnknown())
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered, DL) {}
+ IsOrdered, VFScaleFactor, DL) {}
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
+ bool IsOrdered, unsigned VFScaleFactor,
+ DebugLoc DL = DebugLoc::getUnknown())
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered, DL) {}
+ IsOrdered, VFScaleFactor, DL) {}
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
- return new VPReductionRecipe(RdxKind, getFastMathFlags(),
- getUnderlyingInstr(), getChainOp(), getVecOp(),
- getCondOp(), IsOrdered, getDebugLoc());
+ return new VPReductionRecipe(
+ RdxKind, getFastMathFlags(), getUnderlyingInstr(), getChainOp(),
+ getVecOp(), getCondOp(), IsOrdered, VFScaleFactor, getDebugLoc());
}
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
- R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
- R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
+ R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
}
static inline bool classof(const VPUser *U) {
@@ -2738,6 +2753,8 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
bool isOrdered() const { return IsOrdered; };
/// Return true if the in-loop reduction is conditional.
bool isConditional() const { return IsConditional; };
+ /// Returns true if the reduction outputs a vector with a scaled down VF.
+ bool isPartialReduction() const { return VFScaleFactor > 1; }
/// The VPValue of the scalar Chain being accumulated.
VPValue *getChainOp() const { return getOperand(0); }
/// The VPValue of the vector value to be reduced.
@@ -2746,68 +2763,8 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
VPValue *getCondOp() const {
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
}
-};
-
-/// A recipe for forming partial reductions. In the loop, an accumulator and
-/// vector operand are added together and passed to the next iteration as the
-/// next accumulator. After the loop body, the accumulator is reduced to a
-/// scalar value.
-class VPPartialReductionRecipe : public VPReductionRecipe {
- unsigned Opcode;
-
- /// The divisor by which the VF of this recipe's output should be divided
- /// during execution.
- unsigned VFScaleFactor;
-
-public:
- VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
- VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
- : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
- VFScaleFactor, ReductionInst) {}
- VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
- VPValue *Cond, unsigned ScaleFactor,
- Instruction *ReductionInst = nullptr)
- : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
- FastMathFlags(), ReductionInst,
- ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
- Opcode(Opcode), VFScaleFactor(ScaleFactor) {
- [[maybe_unused]] auto *AccumulatorRecipe =
- getChainOp()->getDefiningRecipe();
- // When cloning as part of a VPExpressionRecipe the chain op could have
- // replaced by a temporary VPValue, so it doesn't have a defining recipe.
- assert((!AccumulatorRecipe ||
- isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
- isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
- "Unexpected operand order for partial reduction recipe");
- }
- ~VPPartialReductionRecipe() override = default;
-
- VPPartialReductionRecipe *clone() override {
- return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
- getCondOp(), VFScaleFactor,
- getUnderlyingInstr());
- }
-
- VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
-
- /// Generate the reduction in the loop.
- void execute(VPTransformState &State) override;
-
- /// Return the cost of this VPPartialReductionRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override;
-
- /// Get the binary op's opcode.
- unsigned getOpcode() const { return Opcode; }
-
/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
};
/// A recipe to represent inloop reduction operations with vector-predication
@@ -2823,7 +2780,7 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
- R.isOrdered(), DL) {}
+ R.isOrdered(), 0, DL) {}
~VPReductionEVLRecipe() override = default;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 07bfe7a896d86..7d94312b9f41b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -286,10 +286,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
- VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
- VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
- return inferScalarType(R->getOperand(0));
- })
+ VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe>(
+ [this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
// VPInstructionWithType must be handled before VPInstruction.
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
VPWidenCastRecipe>(
@@ -559,11 +559,12 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
// fewer lanes than the VF.
unsigned ScaleFactor =
vputils::getVFScaleFactor(VPV->getDefiningRecipe());
- ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
- LLVM_DEBUG(if (VF != VFs[J]) {
- dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
- << " for " << *R << "\n";
- });
+ ElementCount VF = VFs[J];
+ if (ScaleFactor > 1) {
+ VF = VFs[J].divideCoefficientBy(ScaleFactor);
+ LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
+ << " to " << VF << " for " << *R << "\n";);
+ }
Type *ScalarTy = TypeInfo.inferScalarType(VPV);
unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c90f4d6daf019..37da56f9b24b9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -168,7 +168,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
case VPBlendSC:
case VPReductionEVLSC:
- case VPPartialReductionSC:
case VPReductionSC:
case VPScalarIVStepsSC:
case VPVectorPointerSC:
@@ -297,110 +296,6 @@ bool VPRecipeBase::isScalarCast() const {
return VPI && Instruction::isCast(VPI->getOpcode());
}
-InstructionCost
-VPPartialReductionRecipe::computeCost(ElementCount VF,
- VPCostContext &Ctx) const {
- std::optional<unsigned> Opcode;
- VPValue *Op = getOperand(1);
- VPRecipeBase *OpR = Op->getDefiningRecipe();
-
- // If the partial reduction is predicated, a select will be operand 1
- if (match(Op, m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {
- OpR = Op->getDefiningRecipe();
- }
-
- Type *InputTypeA = nullptr, *InputTypeB = nullptr;
- TTI::PartialReductionExtendKind ExtAType = TTI::PR_None,
- ExtBType = TTI::PR_None;
-
- auto GetExtendKind = [](VPRecipeBase *R) {
- if (!R)
- return TTI::PR_None;
- auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
- if (!WidenCastR)
- return TTI::PR_None;
- if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
- return TTI::PR_ZeroExtend;
- if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
- return TTI::PR_SignExtend;
- return TTI::PR_None;
- };
-
- // Pick out opcode, type/ext information and use sub side effects from a widen
- // recipe.
- auto HandleWiden = [&](VPWidenRecipe *Widen) {
- if (match(Widen, m_Sub(m_ZeroInt(), m_VPValue(Op)))) {
- Widen = dyn_cast<VPWidenRecipe>(Op->getDefiningRecipe());
- }
- Opcode = Widen->getOpcode();
- VPRecipeBase *ExtAR = Widen->getOperand(0)->getDefiningRecipe();
- VPRecipeBase *ExtBR = Widen->getOperand(1)->getDefiningRecipe();
- InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
- : Widen->getOperand(0));
- InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
- : Widen->getOperand(1));
- ExtAType = GetExtendKind(ExtAR);
- ExtBType = GetExtendKind(ExtBR);
-
- if (!ExtBR && Widen->getOperand(1)->isLiveIn()) {
- auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue());
- if (canConstantBeExtended(CI, InputTypeA, ExtAType)) {
- InputTypeB = InputTypeA;
- ExtBType = ExtAType;
- }
- }
- };
-
- if (isa<VPWidenCastRecipe>(OpR)) {
- InputTypeA = Ctx.Types.inferScalarType(OpR->getOperand(0));
- ExtAType = GetExtendKind(OpR);
- } else if (isa<VPReductionPHIRecipe>(OpR)) {
- auto RedPhiOp1R = getOperand(1)->getDefiningRecipe();
- if (isa<VPWidenCastRecipe>(RedPhiOp1R)) {
- InputTypeA = Ctx.Types.inferScalarType(RedPhiOp1R->getOperand(0));
- ExtAType = GetExtendKind(RedPhiOp1R);
- } else if (auto Widen = dyn_cast<VPWidenRecipe>(RedPhiOp1R))
- HandleWiden(Widen);
- } else if (auto Widen = dyn_cast<VPWidenRecipe>(OpR)) {
- HandleWiden(Widen);
- } else if (auto Reduction = dyn_cast<VPPartialReductionRecipe>(OpR)) {
- return Reduction->computeCost(VF, Ctx);
- }
- auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
- return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
- PhiType, VF, ExtAType, ExtBType,
- Opcode, Ctx.CostKind);
-}
-
-void VPPartialReductionRecipe::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
-
- assert(getOpcode() == Instruction::Add &&
- "Unhandled partial reduction opcode");
-
- Value *BinOpVal = State.get(getOperand(1));
- Value *PhiVal = State.get(getOperand(0));
- assert(PhiVal && BinOpVal && "Phi and Mul must be set");
-
- Type *RetTy = PhiVal->getType();
-
- CallInst *V =
- Builder.CreateIntrinsic(RetTy, Intrinsic::vector_partial_reduce_add,
- {PhiVal, BinOpVal}, nullptr, "partial.reduce");
-
- State.set(this, V);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPPartialReductionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "PARTIAL-REDUCE ";
- printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
- printOperands(O, SlotTracker);
-}
-#endif
-
void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
assert(OpType == Other.OpType && "OpType must match");
switch (OpType) {
@@ -2651,7 +2546,6 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "Reduction being replicated.");
- Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
RecurKind Kind = getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
"In-loop AnyOf reductions aren't currently supported");
@@ -2674,6 +2568,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
Value *NewRed;
Value *NextInChain;
if (IsOrdered) {
+ Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
if (State.VF.isVector())
NewRed =
createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);
@@ -2683,8 +2578,16 @@ void VPReductionRecipe::execute(VPTransformState &State) {
PrevInChain, NewVecOp);
PrevInChain = NewRed;
NextInChain = NewRed;
+ } else if (isPartialReduction()) {
+ assert(Kind == RecurKind::Add && "Unexpected partial reduction kind");
+ Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ false);
+ NewRed = State.Builder.CreateIntrinsic(
+ PrevInChain->getType(), Intrinsic::vector_partial_reduce_add,
+ {PrevInChain, NewVecOp}, nullptr, "partial.reduce");
+ PrevInChain = NewRed;
+ NextInChain = NewRed;
} else {
- PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
+ Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
@@ -2693,7 +2596,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),
PrevInChain, NewRed);
}
- State.set(this, NextInChain, /*IsScalar*/ true);
+ State.set(this, NextInChain, /*IsScalar*/ !isPartialReduction());
}
void VPReductionEVLRecipe::execute(VPTransformState &State) {
@@ -2740,6 +2643,12 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
std::optional<FastMathFlags> OptionalFMF =
ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
+ if (isPartialReduction())
+ return Ctx.TTI.getPartialReductionCost(
+ Opcode, ElementTy, ElementTy, ElementTy, VF,
+ TargetTransformInfo::PR_None, TargetTransformInfo::PR_None,
+ std::nullopt, Ctx.CostKind);
+
// TODO: Support any-of reductions.
assert(
(!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||
@@ -2845,7 +2754,8 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
unsigned Opcode = RecurrenceDescriptor::getOpcode(
cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());
auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
- if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) {
+ if (cast<VPReductionRecipe>(ExpressionRecipes.back())
+ ->isPartialReduction()) {
return Ctx.TTI.getPartialReductionCost(
Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr, RedTy, VF,
TargetTransformInfo::getPartialReductionExtendKind(ExtR->getOpcode()),
@@ -2864,7 +2774,8 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
Opcode = Instruction::Sub;
LLVM_FALLTHROUGH;
case ExpressionTypes::ExtMulAccReduction: {
- if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) {
+ auto *RedR = cast<VPReductionRecipe>(ExpressionRecipes.back());
+ if (RedR->isPartialReduction()) {
auto *Ext0R = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
auto *Ext1R = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
@@ -2909,12 +2820,11 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
- bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);
switch (ExpressionType) {
case ExpressionTypes::ExtendedReduction: {
getOperand(1)->printAsOperand(O, SlotTracker);
- O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
+ O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(Opcode) << " (";
getOperand(0)->printAsOperand(O, SlotTracker);
Red->printFlags(O);
@@ -2931,7 +2841,7 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
}
case ExpressionTypes::ExtNegatedMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
- O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
+ O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
<< " (sub (0, mul";
@@ -2956,7 +2866,7 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
case ExpressionTypes::MulAccReduction:
case ExpressionTypes::ExtMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
- O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
+ O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
<< " (";
@@ -2993,7 +2903,10 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << Indent << "REDUCE ";
+ if (isPartialReduction())
+ O << Indent << "PARTIAL-REDUCE ";
+ else
+ O << Indent << "REDUCE ";
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
@@ -4363,7 +4276,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
// this value when we vectorize all of the instructions that use the PHI.
BasicBlock *VectorPH =
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
- bool ScalarPHI = State.VF.isScalar() || IsInLoop;
+ bool ScalarPHI = State.VF.isScalar() || isInLoop();
Value *StartV = State.get(StartVPV, ScalarPHI);
Type *VecTy = StartV->getType();
@@ -4372,7 +4285,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
"recipe must be in the vector loop header");
auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
Phi->insertBefore(HeaderBB->getFirstInsertionPt());
- State.set(this, Phi, IsInLoop);
+ State.set(this, Phi, isInLoop());
Phi->addIncoming(StartV, VectorPH);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4bf2eb3765080..beb867cc49553 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3477,7 +3477,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
InstructionCost RedCost = Red->computeCost(VF, Ctx);
- if (isa<VPPartialReductionRecipe>(Red)) {
+ if (Red->isPartialReduction()) {
TargetTransformInfo::PartialReductionExtendKind ExtKind =
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
// FIXME: Move partial reduction creation, costing and clamping
@@ -3518,7 +3518,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
static VPExpressionRecipe *
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
VPCostContext &Ctx, VFRange &Range) {
- bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);
+ bool IsPartialReduction = Red->isPartialReduction();
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
@@ -3537,7 +3537,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
InstructionCost MulAccCost;
- if (IsPartialReduction) {
+ if (Red->isPartialReduction()) {
Type *SrcTy2 =
Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : nullptr;
// FIXME: Move partial reduction creation, costing and clamping
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 059993043dcda..151b6f663b176 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -145,7 +145,7 @@ unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
return 1;
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
return RR->getVFScaleFactor();
- if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
+ if (auto *RR = dyn_cast<VPReductionRecipe>(R))
return RR->getVFScaleFactor();
assert(
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 0678bc90ef4b5..d839011bca5a4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -341,7 +341,6 @@ class VPDef {
VPInterleaveSC,
VPReductionEVLSC,
VPReductionSC,
- VPPartialReductionSC,
VPReplicateSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
index d8f1a86c9ebda..1b523940cbf20 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
@@ -182,316 +182,236 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE62:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE62]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[PRED_LOAD_CONTINUE62]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x i8> poison, i8 [[TMP19]], i32 0
+; CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP101:%.*]] = load i8, ptr [[TMP99]], align 1
+; CHECK-NEXT: [[TMP102:%.*]] = insertelement <16 x i8> poison, i8 [[TMP101]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP21:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP103:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP102]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP23]], align 1
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[TMP24]], i32 1
+; CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
+; CHECK-NEXT: [[TMP109:%.*]] = insertelement <16 x i8> [[TMP103]], i8 [[TMP105]], i32 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP26:%.*]] = phi <16 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP25]], [[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP111:%.*]] = phi <16 x i8> [ [[TMP103]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP109]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[TMP29]], i32 2
+; CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP113:%.*]] = load i8, ptr [[TMP112]], align 1
+; CHECK-NEXT: [[TMP114:%.*]] = insertelement <16 x i8> [[TMP111]], i8 [[TMP113]], i32 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP31:%.*]] = phi <16 x i8> [ [[TMP26]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP30]], [[PRED_LOAD_IF3]] ]
+; CHECK-NEXT: [[TMP115:%.*]] = phi <16 x i8> [ [[TMP111]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP114]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if5:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[TMP34]], i32 3
+; CHECK-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP121:%.*]] = load i8, ptr [[TMP119]], align 1
+; CHECK-NEXT: [[TMP122:%.*]] = insertelement <16 x i8> [[TMP115]], i8 [[TMP121]], i32 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP36:%.*]] = phi <16 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ]
+; CHECK-NEXT: [[TMP123:%.*]] = phi <16 x i8> [ [[TMP115]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP122]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
; CHECK: pred.load.if7:
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i8> [[TMP36]], i8 [[TMP39]], i32 4
+; CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
+; CHECK-NEXT: [[TMP129:%.*]] = insertelement <16 x i8> [[TMP123]], i8 [[TMP125]], i32 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP41:%.*]] = phi <16 x i8> [ [[TMP36]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], [[PRED_LOAD_IF7]] ]
+; CHECK-NEXT: [[TMP131:%.*]] = phi <16 x i8> [ [[TMP123]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP129]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP44]], i32 5
+; CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP133:%.*]] = load i8, ptr [[TMP132]], align 1
+; CHECK-NEXT: [[TMP134:%.*]] = insertelement <16 x i8> [[TMP131]], i8 [[TMP133]], i32 5
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]]
; CHECK: pred.load.continue10:
; CHECK-NEXT: [[TMP46:%.*]] = phi <16 x i8> [ [[TMP41]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], [[PRED_LOAD_IF9]] ]
+; CHECK-NEXT: [[TMP135:%.*]] = phi <16 x i8> [ [[TMP131]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP134]], [[PRED_LOAD_IF9]] ]
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
; CHECK-NEXT: br i1 [[TMP47]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK: pred.load.if11:
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP49]], i32 6
+; CHECK-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP141:%.*]] = load i8, ptr [[TMP139]], align 1
+; CHECK-NEXT: [[TMP142:%.*]] = insertelement <16 x i8> [[TMP135]], i8 [[TMP141]], i32 6
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK: pred.load.continue12:
; CHECK-NEXT: [[TMP51:%.*]] = phi <16 x i8> [ [[TMP46]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], [[PRED_LOAD_IF11]] ]
+; CHECK-NEXT: [[TMP143:%.*]] = phi <16 x i8> [ [[TMP135]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP142]], [[PRED_LOAD_IF11]] ]
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
; CHECK-NEXT: br i1 [[TMP52]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK: pred.load.if13:
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP54]], i32 7
+; CHECK-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
+; CHECK-NEXT: [[TMP149:%.*]] = insertelement <16 x i8> [[TMP143]], i8 [[TMP145]], i32 7
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.continue14:
; CHECK-NEXT: [[TMP56:%.*]] = phi <16 x i8> [ [[TMP51]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], [[PRED_LOAD_IF13]] ]
+; CHECK-NEXT: [[TMP150:%.*]] = phi <16 x i8> [ [[TMP143]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP149]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP57:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
; CHECK-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK: pred.load.if15:
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP59]], i32 8
+; CHECK-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP152:%.*]] = load i8, ptr [[TMP151]], align 1
+; CHECK-NEXT: [[TMP153:%.*]] = insertelement <16 x i8> [[TMP150]], i8 [[TMP152]], i32 8
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK: pred.load.continue16:
; CHECK-NEXT: [[TMP61:%.*]] = phi <16 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], [[PRED_LOAD_IF15]] ]
+; CHECK-NEXT: [[TMP154:%.*]] = phi <16 x i8> [ [[TMP150]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP153]], [[PRED_LOAD_IF15]] ]
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
; CHECK-NEXT: br i1 [[TMP62]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; CHECK: pred.load.if17:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP64:%.*]] = load i8, ptr [[TMP63]], align 1
; CHECK-NEXT: [[TMP65:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP64]], i32 9
+; CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP96]], align 1
+; CHECK-NEXT: [[TMP98:%.*]] = insertelement <16 x i8> [[TMP154]], i8 [[TMP155]], i32 9
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE18]]
; CHECK: pred.load.continue18:
; CHECK-NEXT: [[TMP66:%.*]] = phi <16 x i8> [ [[TMP61]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP65]], [[PRED_LOAD_IF17]] ]
+; CHECK-NEXT: [[TMP100:%.*]] = phi <16 x i8> [ [[TMP154]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP98]], [[PRED_LOAD_IF17]] ]
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
; CHECK-NEXT: br i1 [[TMP67]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
; CHECK: pred.load.if19:
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP69:%.*]] = load i8, ptr [[TMP68]], align 1
; CHECK-NEXT: [[TMP70:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP69]], i32 10
+; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP107:%.*]] = load i8, ptr [[TMP106]], align 1
+; CHECK-NEXT: [[TMP108:%.*]] = insertelement <16 x i8> [[TMP100]], i8 [[TMP107]], i32 10
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE20]]
; CHECK: pred.load.continue20:
; CHECK-NEXT: [[TMP71:%.*]] = phi <16 x i8> [ [[TMP66]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP70]], [[PRED_LOAD_IF19]] ]
+; CHECK-NEXT: [[TMP110:%.*]] = phi <16 x i8> [ [[TMP100]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP108]], [[PRED_LOAD_IF19]] ]
; CHECK-NEXT: [[TMP72:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
; CHECK-NEXT: br i1 [[TMP72]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
; CHECK: pred.load.if21:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
; CHECK-NEXT: [[TMP75:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP74]], i32 11
+; CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP117:%.*]] = load i8, ptr [[TMP116]], align 1
+; CHECK-NEXT: [[TMP118:%.*]] = insertelement <16 x i8> [[TMP110]], i8 [[TMP117]], i32 11
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE22]]
; CHECK: pred.load.continue22:
; CHECK-NEXT: [[TMP76:%.*]] = phi <16 x i8> [ [[TMP71]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP75]], [[PRED_LOAD_IF21]] ]
+; CHECK-NEXT: [[TMP120:%.*]] = phi <16 x i8> [ [[TMP110]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP118]], [[PRED_LOAD_IF21]] ]
; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
; CHECK-NEXT: br i1 [[TMP77]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
; CHECK: pred.load.if23:
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1
; CHECK-NEXT: [[TMP80:%.*]] = insertelement <16 x i8> [[TMP76]], i8 [[TMP79]], i32 12
+; CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP127:%.*]] = load i8, ptr [[TMP126]], align 1
+; CHECK-NEXT: [[TMP128:%.*]] = insertelement <16 x i8> [[TMP120]], i8 [[TMP127]], i32 12
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE24]]
; CHECK: pred.load.continue24:
; CHECK-NEXT: [[TMP81:%.*]] = phi <16 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP80]], [[PRED_LOAD_IF23]] ]
+; CHECK-NEXT: [[TMP130:%.*]] = phi <16 x i8> [ [[TMP120]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP128]], [[PRED_LOAD_IF23]] ]
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
; CHECK-NEXT: br i1 [[TMP82]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
; CHECK: pred.load.if25:
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
; CHECK-NEXT: [[TMP85:%.*]] = insertelement <16 x i8> [[TMP81]], i8 [[TMP84]], i32 13
+; CHECK-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT: [[TMP137:%.*]] = load i8, ptr [[TMP136]], align 1
+; CHECK-NEXT: [[TMP138:%.*]] = insertelement <16 x i8> [[TMP130]], i8 [[TMP137]], i32 13
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE26]]
; CHECK: pred.load.continue26:
; CHECK-NEXT: [[TMP86:%.*]] = phi <16 x i8> [ [[TMP81]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
+; CHECK-NEXT: [[TMP140:%.*]] = phi <16 x i8> [ [[TMP130]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP138]], [[PRED_LOAD_IF25]] ]
; CHECK-NEXT: [[TMP87:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
; CHECK-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
; CHECK: pred.load.if27:
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
; CHECK-NEXT: [[TMP90:%.*]] = insertelement <16 x i8> [[TMP86]], i8 [[TMP89]], i32 14
+; CHECK-NEXT: [[TMP146:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
+; CHECK-NEXT: [[TMP147:%.*]] = load i8, ptr [[TMP146]], align 1
+; CHECK-NEXT: [[TMP148:%.*]] = insertelement <16 x i8> [[TMP140]], i8 [[TMP147]], i32 14
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE28]]
; CHECK: pred.load.continue28:
; CHECK-NEXT: [[TMP91:%.*]] = phi <16 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP90]], [[PRED_LOAD_IF27]] ]
+; CHECK-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP140]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP148]], [[PRED_LOAD_IF27]] ]
; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
+; CHECK-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE62]]
; CHECK: pred.load.if29:
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP94:%.*]] = load i8, ptr [[TMP93]], align 1
; CHECK-NEXT: [[TMP95:%.*]] = insertelement <16 x i8> [[TMP91]], i8 [[TMP94]], i32 15
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE30]]
-; CHECK: pred.load.continue30:
-; CHECK-NEXT: [[TMP96:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
-; CHECK-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP96]] to <16 x i32>
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
-; CHECK-NEXT: br i1 [[TMP98]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
-; CHECK: pred.load.if31:
-; CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1
-; CHECK-NEXT: [[TMP101:%.*]] = insertelement <16 x i8> poison, i8 [[TMP100]], i32 0
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE32]]
-; CHECK: pred.load.continue32:
-; CHECK-NEXT: [[TMP102:%.*]] = phi <16 x i8> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP101]], [[PRED_LOAD_IF31]] ]
-; CHECK-NEXT: [[TMP103:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
-; CHECK-NEXT: br i1 [[TMP103]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
-; CHECK: pred.load.if33:
-; CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
-; CHECK-NEXT: [[TMP106:%.*]] = insertelement <16 x i8> [[TMP102]], i8 [[TMP105]], i32 1
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE34]]
-; CHECK: pred.load.continue34:
-; CHECK-NEXT: [[TMP107:%.*]] = phi <16 x i8> [ [[TMP102]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP106]], [[PRED_LOAD_IF33]] ]
-; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
-; CHECK-NEXT: br i1 [[TMP108]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
-; CHECK: pred.load.if35:
-; CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP110:%.*]] = load i8, ptr [[TMP109]], align 1
-; CHECK-NEXT: [[TMP111:%.*]] = insertelement <16 x i8> [[TMP107]], i8 [[TMP110]], i32 2
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE36]]
-; CHECK: pred.load.continue36:
-; CHECK-NEXT: [[TMP112:%.*]] = phi <16 x i8> [ [[TMP107]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP111]], [[PRED_LOAD_IF35]] ]
-; CHECK-NEXT: [[TMP113:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
-; CHECK-NEXT: br i1 [[TMP113]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
-; CHECK: pred.load.if37:
-; CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP115:%.*]] = load i8, ptr [[TMP114]], align 1
-; CHECK-NEXT: [[TMP116:%.*]] = insertelement <16 x i8> [[TMP112]], i8 [[TMP115]], i32 3
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE38]]
-; CHECK: pred.load.continue38:
-; CHECK-NEXT: [[TMP117:%.*]] = phi <16 x i8> [ [[TMP112]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP116]], [[PRED_LOAD_IF37]] ]
-; CHECK-NEXT: [[TMP118:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
-; CHECK-NEXT: br i1 [[TMP118]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
-; CHECK: pred.load.if39:
-; CHECK-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
-; CHECK-NEXT: [[TMP121:%.*]] = insertelement <16 x i8> [[TMP117]], i8 [[TMP120]], i32 4
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE40]]
-; CHECK: pred.load.continue40:
-; CHECK-NEXT: [[TMP122:%.*]] = phi <16 x i8> [ [[TMP117]], [[PRED_LOAD_CONTINUE38]] ], [ [[TMP121]], [[PRED_LOAD_IF39]] ]
-; CHECK-NEXT: [[TMP123:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
-; CHECK-NEXT: br i1 [[TMP123]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
-; CHECK: pred.load.if41:
-; CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
-; CHECK-NEXT: [[TMP126:%.*]] = insertelement <16 x i8> [[TMP122]], i8 [[TMP125]], i32 5
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE42]]
-; CHECK: pred.load.continue42:
-; CHECK-NEXT: [[TMP127:%.*]] = phi <16 x i8> [ [[TMP122]], [[PRED_LOAD_CONTINUE40]] ], [ [[TMP126]], [[PRED_LOAD_IF41]] ]
-; CHECK-NEXT: [[TMP128:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
-; CHECK-NEXT: br i1 [[TMP128]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44:%.*]]
-; CHECK: pred.load.if43:
-; CHECK-NEXT: [[TMP129:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP130:%.*]] = load i8, ptr [[TMP129]], align 1
-; CHECK-NEXT: [[TMP131:%.*]] = insertelement <16 x i8> [[TMP127]], i8 [[TMP130]], i32 6
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE44]]
-; CHECK: pred.load.continue44:
-; CHECK-NEXT: [[TMP132:%.*]] = phi <16 x i8> [ [[TMP127]], [[PRED_LOAD_CONTINUE42]] ], [ [[TMP131]], [[PRED_LOAD_IF43]] ]
-; CHECK-NEXT: [[TMP133:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
-; CHECK-NEXT: br i1 [[TMP133]], label [[PRED_LOAD_IF45:%.*]], label [[PRED_LOAD_CONTINUE46:%.*]]
-; CHECK: pred.load.if45:
-; CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP135:%.*]] = load i8, ptr [[TMP134]], align 1
-; CHECK-NEXT: [[TMP136:%.*]] = insertelement <16 x i8> [[TMP132]], i8 [[TMP135]], i32 7
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE46]]
-; CHECK: pred.load.continue46:
-; CHECK-NEXT: [[TMP137:%.*]] = phi <16 x i8> [ [[TMP132]], [[PRED_LOAD_CONTINUE44]] ], [ [[TMP136]], [[PRED_LOAD_IF45]] ]
-; CHECK-NEXT: [[TMP138:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
-; CHECK-NEXT: br i1 [[TMP138]], label [[PRED_LOAD_IF47:%.*]], label [[PRED_LOAD_CONTINUE48:%.*]]
-; CHECK: pred.load.if47:
-; CHECK-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP140:%.*]] = load i8, ptr [[TMP139]], align 1
-; CHECK-NEXT: [[TMP141:%.*]] = insertelement <16 x i8> [[TMP137]], i8 [[TMP140]], i32 8
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE48]]
-; CHECK: pred.load.continue48:
-; CHECK-NEXT: [[TMP142:%.*]] = phi <16 x i8> [ [[TMP137]], [[PRED_LOAD_CONTINUE46]] ], [ [[TMP141]], [[PRED_LOAD_IF47]] ]
-; CHECK-NEXT: [[TMP143:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
-; CHECK-NEXT: br i1 [[TMP143]], label [[PRED_LOAD_IF49:%.*]], label [[PRED_LOAD_CONTINUE50:%.*]]
-; CHECK: pred.load.if49:
-; CHECK-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
-; CHECK-NEXT: [[TMP146:%.*]] = insertelement <16 x i8> [[TMP142]], i8 [[TMP145]], i32 9
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE50]]
-; CHECK: pred.load.continue50:
-; CHECK-NEXT: [[TMP147:%.*]] = phi <16 x i8> [ [[TMP142]], [[PRED_LOAD_CONTINUE48]] ], [ [[TMP146]], [[PRED_LOAD_IF49]] ]
-; CHECK-NEXT: [[TMP148:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
-; CHECK-NEXT: br i1 [[TMP148]], label [[PRED_LOAD_IF51:%.*]], label [[PRED_LOAD_CONTINUE52:%.*]]
-; CHECK: pred.load.if51:
-; CHECK-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP150:%.*]] = load i8, ptr [[TMP149]], align 1
-; CHECK-NEXT: [[TMP151:%.*]] = insertelement <16 x i8> [[TMP147]], i8 [[TMP150]], i32 10
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE52]]
-; CHECK: pred.load.continue52:
-; CHECK-NEXT: [[TMP152:%.*]] = phi <16 x i8> [ [[TMP147]], [[PRED_LOAD_CONTINUE50]] ], [ [[TMP151]], [[PRED_LOAD_IF51]] ]
-; CHECK-NEXT: [[TMP153:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
-; CHECK-NEXT: br i1 [[TMP153]], label [[PRED_LOAD_IF53:%.*]], label [[PRED_LOAD_CONTINUE54:%.*]]
-; CHECK: pred.load.if53:
-; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP154]], align 1
-; CHECK-NEXT: [[TMP156:%.*]] = insertelement <16 x i8> [[TMP152]], i8 [[TMP155]], i32 11
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE54]]
-; CHECK: pred.load.continue54:
-; CHECK-NEXT: [[TMP157:%.*]] = phi <16 x i8> [ [[TMP152]], [[PRED_LOAD_CONTINUE52]] ], [ [[TMP156]], [[PRED_LOAD_IF53]] ]
-; CHECK-NEXT: [[TMP158:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
-; CHECK-NEXT: br i1 [[TMP158]], label [[PRED_LOAD_IF55:%.*]], label [[PRED_LOAD_CONTINUE56:%.*]]
-; CHECK: pred.load.if55:
-; CHECK-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-NEXT: [[TMP160:%.*]] = load i8, ptr [[TMP159]], align 1
-; CHECK-NEXT: [[TMP161:%.*]] = insertelement <16 x i8> [[TMP157]], i8 [[TMP160]], i32 12
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE56]]
-; CHECK: pred.load.continue56:
-; CHECK-NEXT: [[TMP162:%.*]] = phi <16 x i8> [ [[TMP157]], [[PRED_LOAD_CONTINUE54]] ], [ [[TMP161]], [[PRED_LOAD_IF55]] ]
-; CHECK-NEXT: [[TMP163:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
-; CHECK-NEXT: br i1 [[TMP163]], label [[PRED_LOAD_IF57:%.*]], label [[PRED_LOAD_CONTINUE58:%.*]]
-; CHECK: pred.load.if57:
-; CHECK-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-NEXT: [[TMP165:%.*]] = load i8, ptr [[TMP164]], align 1
-; CHECK-NEXT: [[TMP166:%.*]] = insertelement <16 x i8> [[TMP162]], i8 [[TMP165]], i32 13
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE58]]
-; CHECK: pred.load.continue58:
-; CHECK-NEXT: [[TMP167:%.*]] = phi <16 x i8> [ [[TMP162]], [[PRED_LOAD_CONTINUE56]] ], [ [[TMP166]], [[PRED_LOAD_IF57]] ]
-; CHECK-NEXT: [[TMP168:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
-; CHECK-NEXT: br i1 [[TMP168]], label [[PRED_LOAD_IF59:%.*]], label [[PRED_LOAD_CONTINUE60:%.*]]
-; CHECK: pred.load.if59:
-; CHECK-NEXT: [[TMP169:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP170:%.*]] = load i8, ptr [[TMP169]], align 1
-; CHECK-NEXT: [[TMP171:%.*]] = insertelement <16 x i8> [[TMP167]], i8 [[TMP170]], i32 14
-; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE60]]
-; CHECK: pred.load.continue60:
-; CHECK-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP167]], [[PRED_LOAD_CONTINUE58]] ], [ [[TMP171]], [[PRED_LOAD_IF59]] ]
-; CHECK-NEXT: [[TMP173:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-NEXT: br i1 [[TMP173]], label [[PRED_LOAD_IF61:%.*]], label [[PRED_LOAD_CONTINUE62]]
-; CHECK: pred.load.if61:
; CHECK-NEXT: [[TMP174:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP175:%.*]] = load i8, ptr [[TMP174]], align 1
; CHECK-NEXT: [[TMP176:%.*]] = insertelement <16 x i8> [[TMP172]], i8 [[TMP175]], i32 15
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE62]]
-; CHECK: pred.load.continue62:
-; CHECK-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE60]] ], [ [[TMP176]], [[PRED_LOAD_IF61]] ]
+; CHECK: pred.load.continue30:
+; CHECK-NEXT: [[TMP159:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
+; CHECK-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP176]], [[PRED_LOAD_IF29]] ]
; CHECK-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32>
+; CHECK-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP159]] to <16 x i32>
; CHECK-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
-; CHECK-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
+; CHECK-NEXT: [[TMP183:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP183]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll
index b84763142b686..b30f18f4f5b57 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll
@@ -991,313 +991,233 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE62:%.*]] ]
; CHECK-INTERLEAVE1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE62]] ]
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[PRED_LOAD_CONTINUE62]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = icmp ule <16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP17]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK-INTERLEAVE1: pred.load.if:
+; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = insertelement <16 x i8> poison, i8 [[TMP19]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP101:%.*]] = load i8, ptr [[TMP99]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP102:%.*]] = insertelement <16 x i8> poison, i8 [[TMP101]], i32 0
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK-INTERLEAVE1: pred.load.continue:
; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_LOAD_IF]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP103:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP102]], [[PRED_LOAD_IF]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK-INTERLEAVE1: pred.load.if1:
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP23]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[TMP24]], i32 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP109:%.*]] = insertelement <16 x i8> [[TMP103]], i8 [[TMP105]], i32 1
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK-INTERLEAVE1: pred.load.continue2:
; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = phi <16 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP25]], [[PRED_LOAD_IF1]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP111:%.*]] = phi <16 x i8> [ [[TMP103]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP109]], [[PRED_LOAD_IF1]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK-INTERLEAVE1: pred.load.if3:
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[TMP29]], i32 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP113:%.*]] = load i8, ptr [[TMP112]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP114:%.*]] = insertelement <16 x i8> [[TMP111]], i8 [[TMP113]], i32 2
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK-INTERLEAVE1: pred.load.continue4:
; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = phi <16 x i8> [ [[TMP26]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP30]], [[PRED_LOAD_IF3]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP115:%.*]] = phi <16 x i8> [ [[TMP111]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP114]], [[PRED_LOAD_IF3]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK-INTERLEAVE1: pred.load.if5:
+; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[TMP34]], i32 3
+; CHECK-INTERLEAVE1-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP121:%.*]] = load i8, ptr [[TMP119]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP122:%.*]] = insertelement <16 x i8> [[TMP115]], i8 [[TMP121]], i32 3
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK-INTERLEAVE1: pred.load.continue6:
; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = phi <16 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP123:%.*]] = phi <16 x i8> [ [[TMP115]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP122]], [[PRED_LOAD_IF5]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
; CHECK-INTERLEAVE1: pred.load.if7:
+; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = insertelement <16 x i8> [[TMP36]], i8 [[TMP39]], i32 4
+; CHECK-INTERLEAVE1-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP129:%.*]] = insertelement <16 x i8> [[TMP123]], i8 [[TMP125]], i32 4
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK-INTERLEAVE1: pred.load.continue8:
; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = phi <16 x i8> [ [[TMP36]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], [[PRED_LOAD_IF7]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP131:%.*]] = phi <16 x i8> [ [[TMP123]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP129]], [[PRED_LOAD_IF7]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK-INTERLEAVE1: pred.load.if9:
+; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP44]], i32 5
+; CHECK-INTERLEAVE1-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP133:%.*]] = load i8, ptr [[TMP132]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP134:%.*]] = insertelement <16 x i8> [[TMP131]], i8 [[TMP133]], i32 5
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE10]]
; CHECK-INTERLEAVE1: pred.load.continue10:
; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = phi <16 x i8> [ [[TMP41]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], [[PRED_LOAD_IF9]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP135:%.*]] = phi <16 x i8> [ [[TMP131]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP134]], [[PRED_LOAD_IF9]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP47]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK-INTERLEAVE1: pred.load.if11:
+; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP49]], i32 6
+; CHECK-INTERLEAVE1-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP141:%.*]] = load i8, ptr [[TMP139]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP142:%.*]] = insertelement <16 x i8> [[TMP135]], i8 [[TMP141]], i32 6
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK-INTERLEAVE1: pred.load.continue12:
; CHECK-INTERLEAVE1-NEXT: [[TMP51:%.*]] = phi <16 x i8> [ [[TMP46]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], [[PRED_LOAD_IF11]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP143:%.*]] = phi <16 x i8> [ [[TMP135]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP142]], [[PRED_LOAD_IF11]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP52]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK-INTERLEAVE1: pred.load.if13:
+; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
; CHECK-INTERLEAVE1-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
; CHECK-INTERLEAVE1-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP54]], i32 7
+; CHECK-INTERLEAVE1-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP149:%.*]] = insertelement <16 x i8> [[TMP143]], i8 [[TMP145]], i32 7
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK-INTERLEAVE1: pred.load.continue14:
; CHECK-INTERLEAVE1-NEXT: [[TMP56:%.*]] = phi <16 x i8> [ [[TMP51]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], [[PRED_LOAD_IF13]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP150:%.*]] = phi <16 x i8> [ [[TMP143]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP149]], [[PRED_LOAD_IF13]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP57:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK-INTERLEAVE1: pred.load.if15:
+; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
; CHECK-INTERLEAVE1-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]]
; CHECK-INTERLEAVE1-NEXT: [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP59]], i32 8
+; CHECK-INTERLEAVE1-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP152:%.*]] = load i8, ptr [[TMP151]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP153:%.*]] = insertelement <16 x i8> [[TMP150]], i8 [[TMP152]], i32 8
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK-INTERLEAVE1: pred.load.continue16:
; CHECK-INTERLEAVE1-NEXT: [[TMP61:%.*]] = phi <16 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], [[PRED_LOAD_IF15]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP154:%.*]] = phi <16 x i8> [ [[TMP150]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP153]], [[PRED_LOAD_IF15]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP62:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP62]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; CHECK-INTERLEAVE1: pred.load.if17:
+; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
; CHECK-INTERLEAVE1-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
; CHECK-INTERLEAVE1-NEXT: [[TMP64:%.*]] = load i8, ptr [[TMP63]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP65:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP64]], i32 9
+; CHECK-INTERLEAVE1-NEXT: [[TMP96:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP96]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP98:%.*]] = insertelement <16 x i8> [[TMP154]], i8 [[TMP155]], i32 9
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE18]]
; CHECK-INTERLEAVE1: pred.load.continue18:
; CHECK-INTERLEAVE1-NEXT: [[TMP66:%.*]] = phi <16 x i8> [ [[TMP61]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP65]], [[PRED_LOAD_IF17]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP100:%.*]] = phi <16 x i8> [ [[TMP154]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP98]], [[PRED_LOAD_IF17]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP67]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
; CHECK-INTERLEAVE1: pred.load.if19:
+; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
; CHECK-INTERLEAVE1-NEXT: [[TMP68:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
; CHECK-INTERLEAVE1-NEXT: [[TMP69:%.*]] = load i8, ptr [[TMP68]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP70:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP69]], i32 10
+; CHECK-INTERLEAVE1-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP107:%.*]] = load i8, ptr [[TMP106]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP108:%.*]] = insertelement <16 x i8> [[TMP100]], i8 [[TMP107]], i32 10
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE20]]
; CHECK-INTERLEAVE1: pred.load.continue20:
; CHECK-INTERLEAVE1-NEXT: [[TMP71:%.*]] = phi <16 x i8> [ [[TMP66]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP70]], [[PRED_LOAD_IF19]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP110:%.*]] = phi <16 x i8> [ [[TMP100]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP108]], [[PRED_LOAD_IF19]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP72:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP72]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
; CHECK-INTERLEAVE1: pred.load.if21:
+; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
; CHECK-INTERLEAVE1-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
; CHECK-INTERLEAVE1-NEXT: [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP75:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP74]], i32 11
+; CHECK-INTERLEAVE1-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP117:%.*]] = load i8, ptr [[TMP116]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP118:%.*]] = insertelement <16 x i8> [[TMP110]], i8 [[TMP117]], i32 11
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE22]]
; CHECK-INTERLEAVE1: pred.load.continue22:
; CHECK-INTERLEAVE1-NEXT: [[TMP76:%.*]] = phi <16 x i8> [ [[TMP71]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP75]], [[PRED_LOAD_IF21]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP120:%.*]] = phi <16 x i8> [ [[TMP110]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP118]], [[PRED_LOAD_IF21]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP77:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP77]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
; CHECK-INTERLEAVE1: pred.load.if23:
+; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
; CHECK-INTERLEAVE1-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; CHECK-INTERLEAVE1-NEXT: [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP80:%.*]] = insertelement <16 x i8> [[TMP76]], i8 [[TMP79]], i32 12
+; CHECK-INTERLEAVE1-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP127:%.*]] = load i8, ptr [[TMP126]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP128:%.*]] = insertelement <16 x i8> [[TMP120]], i8 [[TMP127]], i32 12
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE24]]
; CHECK-INTERLEAVE1: pred.load.continue24:
; CHECK-INTERLEAVE1-NEXT: [[TMP81:%.*]] = phi <16 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP80]], [[PRED_LOAD_IF23]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP130:%.*]] = phi <16 x i8> [ [[TMP120]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP128]], [[PRED_LOAD_IF23]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP82]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
; CHECK-INTERLEAVE1: pred.load.if25:
+; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-INTERLEAVE1-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]]
; CHECK-INTERLEAVE1-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP85:%.*]] = insertelement <16 x i8> [[TMP81]], i8 [[TMP84]], i32 13
+; CHECK-INTERLEAVE1-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP137:%.*]] = load i8, ptr [[TMP136]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP138:%.*]] = insertelement <16 x i8> [[TMP130]], i8 [[TMP137]], i32 13
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE26]]
; CHECK-INTERLEAVE1: pred.load.continue26:
; CHECK-INTERLEAVE1-NEXT: [[TMP86:%.*]] = phi <16 x i8> [ [[TMP81]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP140:%.*]] = phi <16 x i8> [ [[TMP130]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP138]], [[PRED_LOAD_IF25]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP87:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
; CHECK-INTERLEAVE1: pred.load.if27:
+; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-INTERLEAVE1-NEXT: [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
; CHECK-INTERLEAVE1-NEXT: [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP90:%.*]] = insertelement <16 x i8> [[TMP86]], i8 [[TMP89]], i32 14
+; CHECK-INTERLEAVE1-NEXT: [[TMP146:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP147:%.*]] = load i8, ptr [[TMP146]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP148:%.*]] = insertelement <16 x i8> [[TMP140]], i8 [[TMP147]], i32 14
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE28]]
; CHECK-INTERLEAVE1: pred.load.continue28:
; CHECK-INTERLEAVE1-NEXT: [[TMP91:%.*]] = phi <16 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP90]], [[PRED_LOAD_IF27]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP140]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP148]], [[PRED_LOAD_IF27]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP92:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
+; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE62]]
; CHECK-INTERLEAVE1: pred.load.if29:
+; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-INTERLEAVE1-NEXT: [[TMP93:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]]
; CHECK-INTERLEAVE1-NEXT: [[TMP94:%.*]] = load i8, ptr [[TMP93]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP95:%.*]] = insertelement <16 x i8> [[TMP91]], i8 [[TMP94]], i32 15
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE30]]
-; CHECK-INTERLEAVE1: pred.load.continue30:
-; CHECK-INTERLEAVE1-NEXT: [[TMP96:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP96]] to <16 x i32>
-; CHECK-INTERLEAVE1-NEXT: [[TMP98:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP98]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if31:
-; CHECK-INTERLEAVE1-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP101:%.*]] = insertelement <16 x i8> poison, i8 [[TMP100]], i32 0
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE32]]
-; CHECK-INTERLEAVE1: pred.load.continue32:
-; CHECK-INTERLEAVE1-NEXT: [[TMP102:%.*]] = phi <16 x i8> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP101]], [[PRED_LOAD_IF31]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP103:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP103]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if33:
-; CHECK-INTERLEAVE1-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP106:%.*]] = insertelement <16 x i8> [[TMP102]], i8 [[TMP105]], i32 1
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE34]]
-; CHECK-INTERLEAVE1: pred.load.continue34:
-; CHECK-INTERLEAVE1-NEXT: [[TMP107:%.*]] = phi <16 x i8> [ [[TMP102]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP106]], [[PRED_LOAD_IF33]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP108]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if35:
-; CHECK-INTERLEAVE1-NEXT: [[TMP109:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP110:%.*]] = load i8, ptr [[TMP109]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP111:%.*]] = insertelement <16 x i8> [[TMP107]], i8 [[TMP110]], i32 2
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE36]]
-; CHECK-INTERLEAVE1: pred.load.continue36:
-; CHECK-INTERLEAVE1-NEXT: [[TMP112:%.*]] = phi <16 x i8> [ [[TMP107]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP111]], [[PRED_LOAD_IF35]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP113:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP113]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if37:
-; CHECK-INTERLEAVE1-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP115:%.*]] = load i8, ptr [[TMP114]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP116:%.*]] = insertelement <16 x i8> [[TMP112]], i8 [[TMP115]], i32 3
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE38]]
-; CHECK-INTERLEAVE1: pred.load.continue38:
-; CHECK-INTERLEAVE1-NEXT: [[TMP117:%.*]] = phi <16 x i8> [ [[TMP112]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP116]], [[PRED_LOAD_IF37]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP118:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP118]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if39:
-; CHECK-INTERLEAVE1-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP121:%.*]] = insertelement <16 x i8> [[TMP117]], i8 [[TMP120]], i32 4
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE40]]
-; CHECK-INTERLEAVE1: pred.load.continue40:
-; CHECK-INTERLEAVE1-NEXT: [[TMP122:%.*]] = phi <16 x i8> [ [[TMP117]], [[PRED_LOAD_CONTINUE38]] ], [ [[TMP121]], [[PRED_LOAD_IF39]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP123:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP123]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if41:
-; CHECK-INTERLEAVE1-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP126:%.*]] = insertelement <16 x i8> [[TMP122]], i8 [[TMP125]], i32 5
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE42]]
-; CHECK-INTERLEAVE1: pred.load.continue42:
-; CHECK-INTERLEAVE1-NEXT: [[TMP127:%.*]] = phi <16 x i8> [ [[TMP122]], [[PRED_LOAD_CONTINUE40]] ], [ [[TMP126]], [[PRED_LOAD_IF41]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP128:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP128]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if43:
-; CHECK-INTERLEAVE1-NEXT: [[TMP129:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP130:%.*]] = load i8, ptr [[TMP129]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP131:%.*]] = insertelement <16 x i8> [[TMP127]], i8 [[TMP130]], i32 6
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE44]]
-; CHECK-INTERLEAVE1: pred.load.continue44:
-; CHECK-INTERLEAVE1-NEXT: [[TMP132:%.*]] = phi <16 x i8> [ [[TMP127]], [[PRED_LOAD_CONTINUE42]] ], [ [[TMP131]], [[PRED_LOAD_IF43]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP133:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP133]], label [[PRED_LOAD_IF45:%.*]], label [[PRED_LOAD_CONTINUE46:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if45:
-; CHECK-INTERLEAVE1-NEXT: [[TMP134:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP135:%.*]] = load i8, ptr [[TMP134]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP136:%.*]] = insertelement <16 x i8> [[TMP132]], i8 [[TMP135]], i32 7
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE46]]
-; CHECK-INTERLEAVE1: pred.load.continue46:
-; CHECK-INTERLEAVE1-NEXT: [[TMP137:%.*]] = phi <16 x i8> [ [[TMP132]], [[PRED_LOAD_CONTINUE44]] ], [ [[TMP136]], [[PRED_LOAD_IF45]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP138:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP138]], label [[PRED_LOAD_IF47:%.*]], label [[PRED_LOAD_CONTINUE48:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if47:
-; CHECK-INTERLEAVE1-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP140:%.*]] = load i8, ptr [[TMP139]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP141:%.*]] = insertelement <16 x i8> [[TMP137]], i8 [[TMP140]], i32 8
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE48]]
-; CHECK-INTERLEAVE1: pred.load.continue48:
-; CHECK-INTERLEAVE1-NEXT: [[TMP142:%.*]] = phi <16 x i8> [ [[TMP137]], [[PRED_LOAD_CONTINUE46]] ], [ [[TMP141]], [[PRED_LOAD_IF47]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP143:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP143]], label [[PRED_LOAD_IF49:%.*]], label [[PRED_LOAD_CONTINUE50:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if49:
-; CHECK-INTERLEAVE1-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP146:%.*]] = insertelement <16 x i8> [[TMP142]], i8 [[TMP145]], i32 9
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE50]]
-; CHECK-INTERLEAVE1: pred.load.continue50:
-; CHECK-INTERLEAVE1-NEXT: [[TMP147:%.*]] = phi <16 x i8> [ [[TMP142]], [[PRED_LOAD_CONTINUE48]] ], [ [[TMP146]], [[PRED_LOAD_IF49]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP148:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP148]], label [[PRED_LOAD_IF51:%.*]], label [[PRED_LOAD_CONTINUE52:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if51:
-; CHECK-INTERLEAVE1-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP150:%.*]] = load i8, ptr [[TMP149]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP151:%.*]] = insertelement <16 x i8> [[TMP147]], i8 [[TMP150]], i32 10
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE52]]
-; CHECK-INTERLEAVE1: pred.load.continue52:
-; CHECK-INTERLEAVE1-NEXT: [[TMP152:%.*]] = phi <16 x i8> [ [[TMP147]], [[PRED_LOAD_CONTINUE50]] ], [ [[TMP151]], [[PRED_LOAD_IF51]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP153:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP153]], label [[PRED_LOAD_IF53:%.*]], label [[PRED_LOAD_CONTINUE54:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if53:
-; CHECK-INTERLEAVE1-NEXT: [[TMP154:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP154]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP156:%.*]] = insertelement <16 x i8> [[TMP152]], i8 [[TMP155]], i32 11
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE54]]
-; CHECK-INTERLEAVE1: pred.load.continue54:
-; CHECK-INTERLEAVE1-NEXT: [[TMP157:%.*]] = phi <16 x i8> [ [[TMP152]], [[PRED_LOAD_CONTINUE52]] ], [ [[TMP156]], [[PRED_LOAD_IF53]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP158:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP158]], label [[PRED_LOAD_IF55:%.*]], label [[PRED_LOAD_CONTINUE56:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if55:
-; CHECK-INTERLEAVE1-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP160:%.*]] = load i8, ptr [[TMP159]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP161:%.*]] = insertelement <16 x i8> [[TMP157]], i8 [[TMP160]], i32 12
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE56]]
-; CHECK-INTERLEAVE1: pred.load.continue56:
-; CHECK-INTERLEAVE1-NEXT: [[TMP162:%.*]] = phi <16 x i8> [ [[TMP157]], [[PRED_LOAD_CONTINUE54]] ], [ [[TMP161]], [[PRED_LOAD_IF55]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP163:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP163]], label [[PRED_LOAD_IF57:%.*]], label [[PRED_LOAD_CONTINUE58:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if57:
-; CHECK-INTERLEAVE1-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP165:%.*]] = load i8, ptr [[TMP164]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP166:%.*]] = insertelement <16 x i8> [[TMP162]], i8 [[TMP165]], i32 13
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE58]]
-; CHECK-INTERLEAVE1: pred.load.continue58:
-; CHECK-INTERLEAVE1-NEXT: [[TMP167:%.*]] = phi <16 x i8> [ [[TMP162]], [[PRED_LOAD_CONTINUE56]] ], [ [[TMP166]], [[PRED_LOAD_IF57]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP168:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP168]], label [[PRED_LOAD_IF59:%.*]], label [[PRED_LOAD_CONTINUE60:%.*]]
-; CHECK-INTERLEAVE1: pred.load.if59:
-; CHECK-INTERLEAVE1-NEXT: [[TMP169:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP170:%.*]] = load i8, ptr [[TMP169]], align 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP171:%.*]] = insertelement <16 x i8> [[TMP167]], i8 [[TMP170]], i32 14
-; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE60]]
-; CHECK-INTERLEAVE1: pred.load.continue60:
-; CHECK-INTERLEAVE1-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP167]], [[PRED_LOAD_CONTINUE58]] ], [ [[TMP171]], [[PRED_LOAD_IF59]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP173:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP173]], label [[PRED_LOAD_IF61:%.*]], label [[PRED_LOAD_CONTINUE62]]
-; CHECK-INTERLEAVE1: pred.load.if61:
; CHECK-INTERLEAVE1-NEXT: [[TMP174:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]]
; CHECK-INTERLEAVE1-NEXT: [[TMP175:%.*]] = load i8, ptr [[TMP174]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP176:%.*]] = insertelement <16 x i8> [[TMP172]], i8 [[TMP175]], i32 15
; CHECK-INTERLEAVE1-NEXT: br label [[PRED_LOAD_CONTINUE62]]
-; CHECK-INTERLEAVE1: pred.load.continue62:
-; CHECK-INTERLEAVE1-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE60]] ], [ [[TMP176]], [[PRED_LOAD_IF61]] ]
+; CHECK-INTERLEAVE1: pred.load.continue30:
+; CHECK-INTERLEAVE1-NEXT: [[TMP159:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP176]], [[PRED_LOAD_IF29]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP159]] to <16 x i32>
; CHECK-INTERLEAVE1-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
; CHECK-INTERLEAVE1-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
@@ -1327,313 +1247,233 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE62:%.*]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE62]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[PRED_LOAD_CONTINUE62]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = icmp ule <16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP17]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK-INTERLEAVED: pred.load.if:
+; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = insertelement <16 x i8> poison, i8 [[TMP19]], i32 0
+; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT: [[TMP101:%.*]] = load i8, ptr [[TMP99]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP102:%.*]] = insertelement <16 x i8> poison, i8 [[TMP101]], i32 0
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK-INTERLEAVED: pred.load.continue:
; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_LOAD_IF]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP103:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP102]], [[PRED_LOAD_IF]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK-INTERLEAVED: pred.load.if1:
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP23]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[TMP24]], i32 1
+; CHECK-INTERLEAVED-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVED-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP109:%.*]] = insertelement <16 x i8> [[TMP103]], i8 [[TMP105]], i32 1
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK-INTERLEAVED: pred.load.continue2:
; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = phi <16 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP25]], [[PRED_LOAD_IF1]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP111:%.*]] = phi <16 x i8> [ [[TMP103]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP109]], [[PRED_LOAD_IF1]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK-INTERLEAVED: pred.load.if3:
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[TMP29]], i32 2
+; CHECK-INTERLEAVED-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVED-NEXT: [[TMP113:%.*]] = load i8, ptr [[TMP112]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP114:%.*]] = insertelement <16 x i8> [[TMP111]], i8 [[TMP113]], i32 2
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK-INTERLEAVED: pred.load.continue4:
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = phi <16 x i8> [ [[TMP26]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP30]], [[PRED_LOAD_IF3]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP115:%.*]] = phi <16 x i8> [ [[TMP111]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP114]], [[PRED_LOAD_IF3]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK-INTERLEAVED: pred.load.if5:
+; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[TMP34]], i32 3
+; CHECK-INTERLEAVED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVED-NEXT: [[TMP121:%.*]] = load i8, ptr [[TMP119]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP122:%.*]] = insertelement <16 x i8> [[TMP115]], i8 [[TMP121]], i32 3
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK-INTERLEAVED: pred.load.continue6:
; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = phi <16 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP123:%.*]] = phi <16 x i8> [ [[TMP115]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP122]], [[PRED_LOAD_IF5]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
; CHECK-INTERLEAVED: pred.load.if7:
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = insertelement <16 x i8> [[TMP36]], i8 [[TMP39]], i32 4
+; CHECK-INTERLEAVED-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVED-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP129:%.*]] = insertelement <16 x i8> [[TMP123]], i8 [[TMP125]], i32 4
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK-INTERLEAVED: pred.load.continue8:
; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = phi <16 x i8> [ [[TMP36]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], [[PRED_LOAD_IF7]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP131:%.*]] = phi <16 x i8> [ [[TMP123]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP129]], [[PRED_LOAD_IF7]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK-INTERLEAVED: pred.load.if9:
+; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP44]], i32 5
+; CHECK-INTERLEAVED-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVED-NEXT: [[TMP133:%.*]] = load i8, ptr [[TMP132]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP134:%.*]] = insertelement <16 x i8> [[TMP131]], i8 [[TMP133]], i32 5
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE10]]
; CHECK-INTERLEAVED: pred.load.continue10:
; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = phi <16 x i8> [ [[TMP41]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], [[PRED_LOAD_IF9]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP135:%.*]] = phi <16 x i8> [ [[TMP131]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP134]], [[PRED_LOAD_IF9]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP47]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK-INTERLEAVED: pred.load.if11:
+; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP49]], i32 6
+; CHECK-INTERLEAVED-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVED-NEXT: [[TMP141:%.*]] = load i8, ptr [[TMP139]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP142:%.*]] = insertelement <16 x i8> [[TMP135]], i8 [[TMP141]], i32 6
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK-INTERLEAVED: pred.load.continue12:
; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = phi <16 x i8> [ [[TMP46]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], [[PRED_LOAD_IF11]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP143:%.*]] = phi <16 x i8> [ [[TMP135]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP142]], [[PRED_LOAD_IF11]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP52]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK-INTERLEAVED: pred.load.if13:
+; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP54]], i32 7
+; CHECK-INTERLEAVED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVED-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP149:%.*]] = insertelement <16 x i8> [[TMP143]], i8 [[TMP145]], i32 7
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK-INTERLEAVED: pred.load.continue14:
; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = phi <16 x i8> [ [[TMP51]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], [[PRED_LOAD_IF13]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP150:%.*]] = phi <16 x i8> [ [[TMP143]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP149]], [[PRED_LOAD_IF13]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK-INTERLEAVED: pred.load.if15:
+; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
; CHECK-INTERLEAVED-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]]
; CHECK-INTERLEAVED-NEXT: [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP59]], i32 8
+; CHECK-INTERLEAVED-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-INTERLEAVED-NEXT: [[TMP152:%.*]] = load i8, ptr [[TMP151]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP153:%.*]] = insertelement <16 x i8> [[TMP150]], i8 [[TMP152]], i32 8
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK-INTERLEAVED: pred.load.continue16:
; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = phi <16 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], [[PRED_LOAD_IF15]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP154:%.*]] = phi <16 x i8> [ [[TMP150]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP153]], [[PRED_LOAD_IF15]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP62:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP62]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; CHECK-INTERLEAVED: pred.load.if17:
+; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
; CHECK-INTERLEAVED-NEXT: [[TMP64:%.*]] = load i8, ptr [[TMP63]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP65:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP64]], i32 9
+; CHECK-INTERLEAVED-NEXT: [[TMP96:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-INTERLEAVED-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP96]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP98:%.*]] = insertelement <16 x i8> [[TMP154]], i8 [[TMP155]], i32 9
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE18]]
; CHECK-INTERLEAVED: pred.load.continue18:
; CHECK-INTERLEAVED-NEXT: [[TMP66:%.*]] = phi <16 x i8> [ [[TMP61]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP65]], [[PRED_LOAD_IF17]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = phi <16 x i8> [ [[TMP154]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP98]], [[PRED_LOAD_IF17]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP67]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
; CHECK-INTERLEAVED: pred.load.if19:
+; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = load i8, ptr [[TMP68]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP69]], i32 10
+; CHECK-INTERLEAVED-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-INTERLEAVED-NEXT: [[TMP107:%.*]] = load i8, ptr [[TMP106]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP108:%.*]] = insertelement <16 x i8> [[TMP100]], i8 [[TMP107]], i32 10
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE20]]
; CHECK-INTERLEAVED: pred.load.continue20:
; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = phi <16 x i8> [ [[TMP66]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP70]], [[PRED_LOAD_IF19]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP110:%.*]] = phi <16 x i8> [ [[TMP100]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP108]], [[PRED_LOAD_IF19]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP72]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
; CHECK-INTERLEAVED: pred.load.if21:
+; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP74]], i32 11
+; CHECK-INTERLEAVED-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-INTERLEAVED-NEXT: [[TMP117:%.*]] = load i8, ptr [[TMP116]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP118:%.*]] = insertelement <16 x i8> [[TMP110]], i8 [[TMP117]], i32 11
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE22]]
; CHECK-INTERLEAVED: pred.load.continue22:
; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = phi <16 x i8> [ [[TMP71]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP75]], [[PRED_LOAD_IF21]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP120:%.*]] = phi <16 x i8> [ [[TMP110]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP118]], [[PRED_LOAD_IF21]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP77]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
; CHECK-INTERLEAVED: pred.load.if23:
+; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = insertelement <16 x i8> [[TMP76]], i8 [[TMP79]], i32 12
+; CHECK-INTERLEAVED-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVED-NEXT: [[TMP127:%.*]] = load i8, ptr [[TMP126]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP128:%.*]] = insertelement <16 x i8> [[TMP120]], i8 [[TMP127]], i32 12
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE24]]
; CHECK-INTERLEAVED: pred.load.continue24:
; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = phi <16 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP80]], [[PRED_LOAD_IF23]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP130:%.*]] = phi <16 x i8> [ [[TMP120]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP128]], [[PRED_LOAD_IF23]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP82]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
; CHECK-INTERLEAVED: pred.load.if25:
+; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]]
; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i8> [[TMP81]], i8 [[TMP84]], i32 13
+; CHECK-INTERLEAVED-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-INTERLEAVED-NEXT: [[TMP137:%.*]] = load i8, ptr [[TMP136]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP138:%.*]] = insertelement <16 x i8> [[TMP130]], i8 [[TMP137]], i32 13
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE26]]
; CHECK-INTERLEAVED: pred.load.continue26:
; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = phi <16 x i8> [ [[TMP81]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP140:%.*]] = phi <16 x i8> [ [[TMP130]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP138]], [[PRED_LOAD_IF25]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
; CHECK-INTERLEAVED: pred.load.if27:
+; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-INTERLEAVED-NEXT: [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
; CHECK-INTERLEAVED-NEXT: [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP90:%.*]] = insertelement <16 x i8> [[TMP86]], i8 [[TMP89]], i32 14
+; CHECK-INTERLEAVED-NEXT: [[TMP146:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
+; CHECK-INTERLEAVED-NEXT: [[TMP147:%.*]] = load i8, ptr [[TMP146]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP148:%.*]] = insertelement <16 x i8> [[TMP140]], i8 [[TMP147]], i32 14
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE28]]
; CHECK-INTERLEAVED: pred.load.continue28:
; CHECK-INTERLEAVED-NEXT: [[TMP91:%.*]] = phi <16 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP90]], [[PRED_LOAD_IF27]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP140]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP148]], [[PRED_LOAD_IF27]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP92:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
+; CHECK-INTERLEAVED-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE62]]
; CHECK-INTERLEAVED: pred.load.if29:
+; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-INTERLEAVED-NEXT: [[TMP93:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]]
; CHECK-INTERLEAVED-NEXT: [[TMP94:%.*]] = load i8, ptr [[TMP93]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP95:%.*]] = insertelement <16 x i8> [[TMP91]], i8 [[TMP94]], i32 15
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE30]]
-; CHECK-INTERLEAVED: pred.load.continue30:
-; CHECK-INTERLEAVED-NEXT: [[TMP96:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP96]] to <16 x i32>
-; CHECK-INTERLEAVED-NEXT: [[TMP98:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP98]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
-; CHECK-INTERLEAVED: pred.load.if31:
-; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP101:%.*]] = insertelement <16 x i8> poison, i8 [[TMP100]], i32 0
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE32]]
-; CHECK-INTERLEAVED: pred.load.continue32:
-; CHECK-INTERLEAVED-NEXT: [[TMP102:%.*]] = phi <16 x i8> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP101]], [[PRED_LOAD_IF31]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP103:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP103]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
-; CHECK-INTERLEAVED: pred.load.if33:
-; CHECK-INTERLEAVED-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-INTERLEAVED-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP106:%.*]] = insertelement <16 x i8> [[TMP102]], i8 [[TMP105]], i32 1
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE34]]
-; CHECK-INTERLEAVED: pred.load.continue34:
-; CHECK-INTERLEAVED-NEXT: [[TMP107:%.*]] = phi <16 x i8> [ [[TMP102]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP106]], [[PRED_LOAD_IF33]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP108]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
-; CHECK-INTERLEAVED: pred.load.if35:
-; CHECK-INTERLEAVED-NEXT: [[TMP109:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-INTERLEAVED-NEXT: [[TMP110:%.*]] = load i8, ptr [[TMP109]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP111:%.*]] = insertelement <16 x i8> [[TMP107]], i8 [[TMP110]], i32 2
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE36]]
-; CHECK-INTERLEAVED: pred.load.continue36:
-; CHECK-INTERLEAVED-NEXT: [[TMP112:%.*]] = phi <16 x i8> [ [[TMP107]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP111]], [[PRED_LOAD_IF35]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP113:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP113]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
-; CHECK-INTERLEAVED: pred.load.if37:
-; CHECK-INTERLEAVED-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-INTERLEAVED-NEXT: [[TMP115:%.*]] = load i8, ptr [[TMP114]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP116:%.*]] = insertelement <16 x i8> [[TMP112]], i8 [[TMP115]], i32 3
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE38]]
-; CHECK-INTERLEAVED: pred.load.continue38:
-; CHECK-INTERLEAVED-NEXT: [[TMP117:%.*]] = phi <16 x i8> [ [[TMP112]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP116]], [[PRED_LOAD_IF37]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP118:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP118]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
-; CHECK-INTERLEAVED: pred.load.if39:
-; CHECK-INTERLEAVED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-INTERLEAVED-NEXT: [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP121:%.*]] = insertelement <16 x i8> [[TMP117]], i8 [[TMP120]], i32 4
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE40]]
-; CHECK-INTERLEAVED: pred.load.continue40:
-; CHECK-INTERLEAVED-NEXT: [[TMP122:%.*]] = phi <16 x i8> [ [[TMP117]], [[PRED_LOAD_CONTINUE38]] ], [ [[TMP121]], [[PRED_LOAD_IF39]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP123:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP123]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
-; CHECK-INTERLEAVED: pred.load.if41:
-; CHECK-INTERLEAVED-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-INTERLEAVED-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP126:%.*]] = insertelement <16 x i8> [[TMP122]], i8 [[TMP125]], i32 5
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE42]]
-; CHECK-INTERLEAVED: pred.load.continue42:
-; CHECK-INTERLEAVED-NEXT: [[TMP127:%.*]] = phi <16 x i8> [ [[TMP122]], [[PRED_LOAD_CONTINUE40]] ], [ [[TMP126]], [[PRED_LOAD_IF41]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP128:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP128]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44:%.*]]
-; CHECK-INTERLEAVED: pred.load.if43:
-; CHECK-INTERLEAVED-NEXT: [[TMP129:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-INTERLEAVED-NEXT: [[TMP130:%.*]] = load i8, ptr [[TMP129]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP131:%.*]] = insertelement <16 x i8> [[TMP127]], i8 [[TMP130]], i32 6
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE44]]
-; CHECK-INTERLEAVED: pred.load.continue44:
-; CHECK-INTERLEAVED-NEXT: [[TMP132:%.*]] = phi <16 x i8> [ [[TMP127]], [[PRED_LOAD_CONTINUE42]] ], [ [[TMP131]], [[PRED_LOAD_IF43]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP133:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP133]], label [[PRED_LOAD_IF45:%.*]], label [[PRED_LOAD_CONTINUE46:%.*]]
-; CHECK-INTERLEAVED: pred.load.if45:
-; CHECK-INTERLEAVED-NEXT: [[TMP134:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-INTERLEAVED-NEXT: [[TMP135:%.*]] = load i8, ptr [[TMP134]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP136:%.*]] = insertelement <16 x i8> [[TMP132]], i8 [[TMP135]], i32 7
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE46]]
-; CHECK-INTERLEAVED: pred.load.continue46:
-; CHECK-INTERLEAVED-NEXT: [[TMP137:%.*]] = phi <16 x i8> [ [[TMP132]], [[PRED_LOAD_CONTINUE44]] ], [ [[TMP136]], [[PRED_LOAD_IF45]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP138:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP138]], label [[PRED_LOAD_IF47:%.*]], label [[PRED_LOAD_CONTINUE48:%.*]]
-; CHECK-INTERLEAVED: pred.load.if47:
-; CHECK-INTERLEAVED-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-INTERLEAVED-NEXT: [[TMP140:%.*]] = load i8, ptr [[TMP139]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP141:%.*]] = insertelement <16 x i8> [[TMP137]], i8 [[TMP140]], i32 8
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE48]]
-; CHECK-INTERLEAVED: pred.load.continue48:
-; CHECK-INTERLEAVED-NEXT: [[TMP142:%.*]] = phi <16 x i8> [ [[TMP137]], [[PRED_LOAD_CONTINUE46]] ], [ [[TMP141]], [[PRED_LOAD_IF47]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP143:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP143]], label [[PRED_LOAD_IF49:%.*]], label [[PRED_LOAD_CONTINUE50:%.*]]
-; CHECK-INTERLEAVED: pred.load.if49:
-; CHECK-INTERLEAVED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-INTERLEAVED-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP146:%.*]] = insertelement <16 x i8> [[TMP142]], i8 [[TMP145]], i32 9
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE50]]
-; CHECK-INTERLEAVED: pred.load.continue50:
-; CHECK-INTERLEAVED-NEXT: [[TMP147:%.*]] = phi <16 x i8> [ [[TMP142]], [[PRED_LOAD_CONTINUE48]] ], [ [[TMP146]], [[PRED_LOAD_IF49]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP148:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP148]], label [[PRED_LOAD_IF51:%.*]], label [[PRED_LOAD_CONTINUE52:%.*]]
-; CHECK-INTERLEAVED: pred.load.if51:
-; CHECK-INTERLEAVED-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-INTERLEAVED-NEXT: [[TMP150:%.*]] = load i8, ptr [[TMP149]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP151:%.*]] = insertelement <16 x i8> [[TMP147]], i8 [[TMP150]], i32 10
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE52]]
-; CHECK-INTERLEAVED: pred.load.continue52:
-; CHECK-INTERLEAVED-NEXT: [[TMP152:%.*]] = phi <16 x i8> [ [[TMP147]], [[PRED_LOAD_CONTINUE50]] ], [ [[TMP151]], [[PRED_LOAD_IF51]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP153:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP153]], label [[PRED_LOAD_IF53:%.*]], label [[PRED_LOAD_CONTINUE54:%.*]]
-; CHECK-INTERLEAVED: pred.load.if53:
-; CHECK-INTERLEAVED-NEXT: [[TMP154:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-INTERLEAVED-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP154]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP156:%.*]] = insertelement <16 x i8> [[TMP152]], i8 [[TMP155]], i32 11
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE54]]
-; CHECK-INTERLEAVED: pred.load.continue54:
-; CHECK-INTERLEAVED-NEXT: [[TMP157:%.*]] = phi <16 x i8> [ [[TMP152]], [[PRED_LOAD_CONTINUE52]] ], [ [[TMP156]], [[PRED_LOAD_IF53]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP158:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP158]], label [[PRED_LOAD_IF55:%.*]], label [[PRED_LOAD_CONTINUE56:%.*]]
-; CHECK-INTERLEAVED: pred.load.if55:
-; CHECK-INTERLEAVED-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-INTERLEAVED-NEXT: [[TMP160:%.*]] = load i8, ptr [[TMP159]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP161:%.*]] = insertelement <16 x i8> [[TMP157]], i8 [[TMP160]], i32 12
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE56]]
-; CHECK-INTERLEAVED: pred.load.continue56:
-; CHECK-INTERLEAVED-NEXT: [[TMP162:%.*]] = phi <16 x i8> [ [[TMP157]], [[PRED_LOAD_CONTINUE54]] ], [ [[TMP161]], [[PRED_LOAD_IF55]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP163:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP163]], label [[PRED_LOAD_IF57:%.*]], label [[PRED_LOAD_CONTINUE58:%.*]]
-; CHECK-INTERLEAVED: pred.load.if57:
-; CHECK-INTERLEAVED-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-INTERLEAVED-NEXT: [[TMP165:%.*]] = load i8, ptr [[TMP164]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP166:%.*]] = insertelement <16 x i8> [[TMP162]], i8 [[TMP165]], i32 13
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE58]]
-; CHECK-INTERLEAVED: pred.load.continue58:
-; CHECK-INTERLEAVED-NEXT: [[TMP167:%.*]] = phi <16 x i8> [ [[TMP162]], [[PRED_LOAD_CONTINUE56]] ], [ [[TMP166]], [[PRED_LOAD_IF57]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP168:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP168]], label [[PRED_LOAD_IF59:%.*]], label [[PRED_LOAD_CONTINUE60:%.*]]
-; CHECK-INTERLEAVED: pred.load.if59:
-; CHECK-INTERLEAVED-NEXT: [[TMP169:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-INTERLEAVED-NEXT: [[TMP170:%.*]] = load i8, ptr [[TMP169]], align 1
-; CHECK-INTERLEAVED-NEXT: [[TMP171:%.*]] = insertelement <16 x i8> [[TMP167]], i8 [[TMP170]], i32 14
-; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE60]]
-; CHECK-INTERLEAVED: pred.load.continue60:
-; CHECK-INTERLEAVED-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP167]], [[PRED_LOAD_CONTINUE58]] ], [ [[TMP171]], [[PRED_LOAD_IF59]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP173:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP173]], label [[PRED_LOAD_IF61:%.*]], label [[PRED_LOAD_CONTINUE62]]
-; CHECK-INTERLEAVED: pred.load.if61:
; CHECK-INTERLEAVED-NEXT: [[TMP174:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]]
; CHECK-INTERLEAVED-NEXT: [[TMP175:%.*]] = load i8, ptr [[TMP174]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP176:%.*]] = insertelement <16 x i8> [[TMP172]], i8 [[TMP175]], i32 15
; CHECK-INTERLEAVED-NEXT: br label [[PRED_LOAD_CONTINUE62]]
-; CHECK-INTERLEAVED: pred.load.continue62:
-; CHECK-INTERLEAVED-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE60]] ], [ [[TMP176]], [[PRED_LOAD_IF61]] ]
+; CHECK-INTERLEAVED: pred.load.continue30:
+; CHECK-INTERLEAVED-NEXT: [[TMP159:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP176]], [[PRED_LOAD_IF29]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32>
+; CHECK-INTERLEAVED-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP159]] to <16 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
; CHECK-INTERLEAVED-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
@@ -1663,313 +1503,233 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE62:%.*]] ]
; CHECK-MAXBW-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE62]] ]
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[PRED_LOAD_CONTINUE62]] ]
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = icmp ule <16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
; CHECK-MAXBW-NEXT: br i1 [[TMP17]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK-MAXBW: pred.load.if:
+; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = insertelement <16 x i8> poison, i8 [[TMP19]], i32 0
+; CHECK-MAXBW-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-MAXBW-NEXT: [[TMP101:%.*]] = load i8, ptr [[TMP99]], align 1
+; CHECK-MAXBW-NEXT: [[TMP102:%.*]] = insertelement <16 x i8> poison, i8 [[TMP101]], i32 0
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK-MAXBW: pred.load.continue:
; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_LOAD_IF]] ]
+; CHECK-MAXBW-NEXT: [[TMP103:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP102]], [[PRED_LOAD_IF]] ]
; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK-MAXBW: pred.load.if1:
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP23]], align 1
; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[TMP24]], i32 1
+; CHECK-MAXBW-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-MAXBW-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
+; CHECK-MAXBW-NEXT: [[TMP109:%.*]] = insertelement <16 x i8> [[TMP103]], i8 [[TMP105]], i32 1
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK-MAXBW: pred.load.continue2:
; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = phi <16 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP25]], [[PRED_LOAD_IF1]] ]
+; CHECK-MAXBW-NEXT: [[TMP111:%.*]] = phi <16 x i8> [ [[TMP103]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP109]], [[PRED_LOAD_IF1]] ]
; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
; CHECK-MAXBW-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK-MAXBW: pred.load.if3:
+; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[TMP29]], i32 2
+; CHECK-MAXBW-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-MAXBW-NEXT: [[TMP113:%.*]] = load i8, ptr [[TMP112]], align 1
+; CHECK-MAXBW-NEXT: [[TMP114:%.*]] = insertelement <16 x i8> [[TMP111]], i8 [[TMP113]], i32 2
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK-MAXBW: pred.load.continue4:
; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = phi <16 x i8> [ [[TMP26]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP30]], [[PRED_LOAD_IF3]] ]
+; CHECK-MAXBW-NEXT: [[TMP115:%.*]] = phi <16 x i8> [ [[TMP111]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP114]], [[PRED_LOAD_IF3]] ]
; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
; CHECK-MAXBW-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK-MAXBW: pred.load.if5:
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
; CHECK-MAXBW-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
; CHECK-MAXBW-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[TMP34]], i32 3
+; CHECK-MAXBW-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-MAXBW-NEXT: [[TMP121:%.*]] = load i8, ptr [[TMP119]], align 1
+; CHECK-MAXBW-NEXT: [[TMP122:%.*]] = insertelement <16 x i8> [[TMP115]], i8 [[TMP121]], i32 3
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK-MAXBW: pred.load.continue6:
; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = phi <16 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ]
+; CHECK-MAXBW-NEXT: [[TMP123:%.*]] = phi <16 x i8> [ [[TMP115]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP122]], [[PRED_LOAD_IF5]] ]
; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
; CHECK-MAXBW-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
; CHECK-MAXBW: pred.load.if7:
+; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = insertelement <16 x i8> [[TMP36]], i8 [[TMP39]], i32 4
+; CHECK-MAXBW-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-MAXBW-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
+; CHECK-MAXBW-NEXT: [[TMP129:%.*]] = insertelement <16 x i8> [[TMP123]], i8 [[TMP125]], i32 4
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK-MAXBW: pred.load.continue8:
; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = phi <16 x i8> [ [[TMP36]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], [[PRED_LOAD_IF7]] ]
+; CHECK-MAXBW-NEXT: [[TMP131:%.*]] = phi <16 x i8> [ [[TMP123]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP129]], [[PRED_LOAD_IF7]] ]
; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
; CHECK-MAXBW-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK-MAXBW: pred.load.if9:
+; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP44]], i32 5
+; CHECK-MAXBW-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-MAXBW-NEXT: [[TMP133:%.*]] = load i8, ptr [[TMP132]], align 1
+; CHECK-MAXBW-NEXT: [[TMP134:%.*]] = insertelement <16 x i8> [[TMP131]], i8 [[TMP133]], i32 5
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE10]]
; CHECK-MAXBW: pred.load.continue10:
; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = phi <16 x i8> [ [[TMP41]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], [[PRED_LOAD_IF9]] ]
+; CHECK-MAXBW-NEXT: [[TMP135:%.*]] = phi <16 x i8> [ [[TMP131]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP134]], [[PRED_LOAD_IF9]] ]
; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
; CHECK-MAXBW-NEXT: br i1 [[TMP47]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK-MAXBW: pred.load.if11:
+; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP49]], i32 6
+; CHECK-MAXBW-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-MAXBW-NEXT: [[TMP141:%.*]] = load i8, ptr [[TMP139]], align 1
+; CHECK-MAXBW-NEXT: [[TMP142:%.*]] = insertelement <16 x i8> [[TMP135]], i8 [[TMP141]], i32 6
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK-MAXBW: pred.load.continue12:
; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = phi <16 x i8> [ [[TMP46]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], [[PRED_LOAD_IF11]] ]
+; CHECK-MAXBW-NEXT: [[TMP143:%.*]] = phi <16 x i8> [ [[TMP135]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP142]], [[PRED_LOAD_IF11]] ]
; CHECK-MAXBW-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
; CHECK-MAXBW-NEXT: br i1 [[TMP52]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK-MAXBW: pred.load.if13:
+; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
; CHECK-MAXBW-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
; CHECK-MAXBW-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
; CHECK-MAXBW-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP54]], i32 7
+; CHECK-MAXBW-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-MAXBW-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
+; CHECK-MAXBW-NEXT: [[TMP149:%.*]] = insertelement <16 x i8> [[TMP143]], i8 [[TMP145]], i32 7
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK-MAXBW: pred.load.continue14:
; CHECK-MAXBW-NEXT: [[TMP56:%.*]] = phi <16 x i8> [ [[TMP51]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], [[PRED_LOAD_IF13]] ]
+; CHECK-MAXBW-NEXT: [[TMP150:%.*]] = phi <16 x i8> [ [[TMP143]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP149]], [[PRED_LOAD_IF13]] ]
; CHECK-MAXBW-NEXT: [[TMP57:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
; CHECK-MAXBW-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK-MAXBW: pred.load.if15:
+; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
; CHECK-MAXBW-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]]
; CHECK-MAXBW-NEXT: [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
; CHECK-MAXBW-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP59]], i32 8
+; CHECK-MAXBW-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-MAXBW-NEXT: [[TMP152:%.*]] = load i8, ptr [[TMP151]], align 1
+; CHECK-MAXBW-NEXT: [[TMP153:%.*]] = insertelement <16 x i8> [[TMP150]], i8 [[TMP152]], i32 8
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK-MAXBW: pred.load.continue16:
; CHECK-MAXBW-NEXT: [[TMP61:%.*]] = phi <16 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], [[PRED_LOAD_IF15]] ]
+; CHECK-MAXBW-NEXT: [[TMP154:%.*]] = phi <16 x i8> [ [[TMP150]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP153]], [[PRED_LOAD_IF15]] ]
; CHECK-MAXBW-NEXT: [[TMP62:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
; CHECK-MAXBW-NEXT: br i1 [[TMP62]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; CHECK-MAXBW: pred.load.if17:
+; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
; CHECK-MAXBW-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
; CHECK-MAXBW-NEXT: [[TMP64:%.*]] = load i8, ptr [[TMP63]], align 1
; CHECK-MAXBW-NEXT: [[TMP65:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP64]], i32 9
+; CHECK-MAXBW-NEXT: [[TMP96:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-MAXBW-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP96]], align 1
+; CHECK-MAXBW-NEXT: [[TMP98:%.*]] = insertelement <16 x i8> [[TMP154]], i8 [[TMP155]], i32 9
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE18]]
; CHECK-MAXBW: pred.load.continue18:
; CHECK-MAXBW-NEXT: [[TMP66:%.*]] = phi <16 x i8> [ [[TMP61]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP65]], [[PRED_LOAD_IF17]] ]
+; CHECK-MAXBW-NEXT: [[TMP100:%.*]] = phi <16 x i8> [ [[TMP154]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP98]], [[PRED_LOAD_IF17]] ]
; CHECK-MAXBW-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
; CHECK-MAXBW-NEXT: br i1 [[TMP67]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
; CHECK-MAXBW: pred.load.if19:
+; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
; CHECK-MAXBW-NEXT: [[TMP68:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
; CHECK-MAXBW-NEXT: [[TMP69:%.*]] = load i8, ptr [[TMP68]], align 1
; CHECK-MAXBW-NEXT: [[TMP70:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP69]], i32 10
+; CHECK-MAXBW-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-MAXBW-NEXT: [[TMP107:%.*]] = load i8, ptr [[TMP106]], align 1
+; CHECK-MAXBW-NEXT: [[TMP108:%.*]] = insertelement <16 x i8> [[TMP100]], i8 [[TMP107]], i32 10
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE20]]
; CHECK-MAXBW: pred.load.continue20:
; CHECK-MAXBW-NEXT: [[TMP71:%.*]] = phi <16 x i8> [ [[TMP66]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP70]], [[PRED_LOAD_IF19]] ]
+; CHECK-MAXBW-NEXT: [[TMP110:%.*]] = phi <16 x i8> [ [[TMP100]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP108]], [[PRED_LOAD_IF19]] ]
; CHECK-MAXBW-NEXT: [[TMP72:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
; CHECK-MAXBW-NEXT: br i1 [[TMP72]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
; CHECK-MAXBW: pred.load.if21:
+; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
; CHECK-MAXBW-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
; CHECK-MAXBW-NEXT: [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
; CHECK-MAXBW-NEXT: [[TMP75:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP74]], i32 11
+; CHECK-MAXBW-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-MAXBW-NEXT: [[TMP117:%.*]] = load i8, ptr [[TMP116]], align 1
+; CHECK-MAXBW-NEXT: [[TMP118:%.*]] = insertelement <16 x i8> [[TMP110]], i8 [[TMP117]], i32 11
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE22]]
; CHECK-MAXBW: pred.load.continue22:
; CHECK-MAXBW-NEXT: [[TMP76:%.*]] = phi <16 x i8> [ [[TMP71]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP75]], [[PRED_LOAD_IF21]] ]
+; CHECK-MAXBW-NEXT: [[TMP120:%.*]] = phi <16 x i8> [ [[TMP110]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP118]], [[PRED_LOAD_IF21]] ]
; CHECK-MAXBW-NEXT: [[TMP77:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
; CHECK-MAXBW-NEXT: br i1 [[TMP77]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
; CHECK-MAXBW: pred.load.if23:
+; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
; CHECK-MAXBW-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; CHECK-MAXBW-NEXT: [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1
; CHECK-MAXBW-NEXT: [[TMP80:%.*]] = insertelement <16 x i8> [[TMP76]], i8 [[TMP79]], i32 12
+; CHECK-MAXBW-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-MAXBW-NEXT: [[TMP127:%.*]] = load i8, ptr [[TMP126]], align 1
+; CHECK-MAXBW-NEXT: [[TMP128:%.*]] = insertelement <16 x i8> [[TMP120]], i8 [[TMP127]], i32 12
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE24]]
; CHECK-MAXBW: pred.load.continue24:
; CHECK-MAXBW-NEXT: [[TMP81:%.*]] = phi <16 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP80]], [[PRED_LOAD_IF23]] ]
+; CHECK-MAXBW-NEXT: [[TMP130:%.*]] = phi <16 x i8> [ [[TMP120]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP128]], [[PRED_LOAD_IF23]] ]
; CHECK-MAXBW-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
; CHECK-MAXBW-NEXT: br i1 [[TMP82]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
; CHECK-MAXBW: pred.load.if25:
+; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-MAXBW-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]]
; CHECK-MAXBW-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
; CHECK-MAXBW-NEXT: [[TMP85:%.*]] = insertelement <16 x i8> [[TMP81]], i8 [[TMP84]], i32 13
+; CHECK-MAXBW-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-MAXBW-NEXT: [[TMP137:%.*]] = load i8, ptr [[TMP136]], align 1
+; CHECK-MAXBW-NEXT: [[TMP138:%.*]] = insertelement <16 x i8> [[TMP130]], i8 [[TMP137]], i32 13
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE26]]
; CHECK-MAXBW: pred.load.continue26:
; CHECK-MAXBW-NEXT: [[TMP86:%.*]] = phi <16 x i8> [ [[TMP81]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
+; CHECK-MAXBW-NEXT: [[TMP140:%.*]] = phi <16 x i8> [ [[TMP130]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP138]], [[PRED_LOAD_IF25]] ]
; CHECK-MAXBW-NEXT: [[TMP87:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
; CHECK-MAXBW-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
; CHECK-MAXBW: pred.load.if27:
+; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-MAXBW-NEXT: [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
; CHECK-MAXBW-NEXT: [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
; CHECK-MAXBW-NEXT: [[TMP90:%.*]] = insertelement <16 x i8> [[TMP86]], i8 [[TMP89]], i32 14
+; CHECK-MAXBW-NEXT: [[TMP146:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
+; CHECK-MAXBW-NEXT: [[TMP147:%.*]] = load i8, ptr [[TMP146]], align 1
+; CHECK-MAXBW-NEXT: [[TMP148:%.*]] = insertelement <16 x i8> [[TMP140]], i8 [[TMP147]], i32 14
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE28]]
; CHECK-MAXBW: pred.load.continue28:
; CHECK-MAXBW-NEXT: [[TMP91:%.*]] = phi <16 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP90]], [[PRED_LOAD_IF27]] ]
+; CHECK-MAXBW-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP140]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP148]], [[PRED_LOAD_IF27]] ]
; CHECK-MAXBW-NEXT: [[TMP92:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-MAXBW-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
+; CHECK-MAXBW-NEXT: br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE62]]
; CHECK-MAXBW: pred.load.if29:
+; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
; CHECK-MAXBW-NEXT: [[TMP93:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]]
; CHECK-MAXBW-NEXT: [[TMP94:%.*]] = load i8, ptr [[TMP93]], align 1
; CHECK-MAXBW-NEXT: [[TMP95:%.*]] = insertelement <16 x i8> [[TMP91]], i8 [[TMP94]], i32 15
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE30]]
-; CHECK-MAXBW: pred.load.continue30:
-; CHECK-MAXBW-NEXT: [[TMP96:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
-; CHECK-MAXBW-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP96]] to <16 x i32>
-; CHECK-MAXBW-NEXT: [[TMP98:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
-; CHECK-MAXBW-NEXT: br i1 [[TMP98]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
-; CHECK-MAXBW: pred.load.if31:
-; CHECK-MAXBW-NEXT: [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-MAXBW-NEXT: [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1
-; CHECK-MAXBW-NEXT: [[TMP101:%.*]] = insertelement <16 x i8> poison, i8 [[TMP100]], i32 0
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE32]]
-; CHECK-MAXBW: pred.load.continue32:
-; CHECK-MAXBW-NEXT: [[TMP102:%.*]] = phi <16 x i8> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP101]], [[PRED_LOAD_IF31]] ]
-; CHECK-MAXBW-NEXT: [[TMP103:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
-; CHECK-MAXBW-NEXT: br i1 [[TMP103]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
-; CHECK-MAXBW: pred.load.if33:
-; CHECK-MAXBW-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-MAXBW-NEXT: [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
-; CHECK-MAXBW-NEXT: [[TMP106:%.*]] = insertelement <16 x i8> [[TMP102]], i8 [[TMP105]], i32 1
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE34]]
-; CHECK-MAXBW: pred.load.continue34:
-; CHECK-MAXBW-NEXT: [[TMP107:%.*]] = phi <16 x i8> [ [[TMP102]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP106]], [[PRED_LOAD_IF33]] ]
-; CHECK-MAXBW-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
-; CHECK-MAXBW-NEXT: br i1 [[TMP108]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
-; CHECK-MAXBW: pred.load.if35:
-; CHECK-MAXBW-NEXT: [[TMP109:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-MAXBW-NEXT: [[TMP110:%.*]] = load i8, ptr [[TMP109]], align 1
-; CHECK-MAXBW-NEXT: [[TMP111:%.*]] = insertelement <16 x i8> [[TMP107]], i8 [[TMP110]], i32 2
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE36]]
-; CHECK-MAXBW: pred.load.continue36:
-; CHECK-MAXBW-NEXT: [[TMP112:%.*]] = phi <16 x i8> [ [[TMP107]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP111]], [[PRED_LOAD_IF35]] ]
-; CHECK-MAXBW-NEXT: [[TMP113:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
-; CHECK-MAXBW-NEXT: br i1 [[TMP113]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
-; CHECK-MAXBW: pred.load.if37:
-; CHECK-MAXBW-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-MAXBW-NEXT: [[TMP115:%.*]] = load i8, ptr [[TMP114]], align 1
-; CHECK-MAXBW-NEXT: [[TMP116:%.*]] = insertelement <16 x i8> [[TMP112]], i8 [[TMP115]], i32 3
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE38]]
-; CHECK-MAXBW: pred.load.continue38:
-; CHECK-MAXBW-NEXT: [[TMP117:%.*]] = phi <16 x i8> [ [[TMP112]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP116]], [[PRED_LOAD_IF37]] ]
-; CHECK-MAXBW-NEXT: [[TMP118:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
-; CHECK-MAXBW-NEXT: br i1 [[TMP118]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
-; CHECK-MAXBW: pred.load.if39:
-; CHECK-MAXBW-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-MAXBW-NEXT: [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
-; CHECK-MAXBW-NEXT: [[TMP121:%.*]] = insertelement <16 x i8> [[TMP117]], i8 [[TMP120]], i32 4
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE40]]
-; CHECK-MAXBW: pred.load.continue40:
-; CHECK-MAXBW-NEXT: [[TMP122:%.*]] = phi <16 x i8> [ [[TMP117]], [[PRED_LOAD_CONTINUE38]] ], [ [[TMP121]], [[PRED_LOAD_IF39]] ]
-; CHECK-MAXBW-NEXT: [[TMP123:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
-; CHECK-MAXBW-NEXT: br i1 [[TMP123]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
-; CHECK-MAXBW: pred.load.if41:
-; CHECK-MAXBW-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-MAXBW-NEXT: [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
-; CHECK-MAXBW-NEXT: [[TMP126:%.*]] = insertelement <16 x i8> [[TMP122]], i8 [[TMP125]], i32 5
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE42]]
-; CHECK-MAXBW: pred.load.continue42:
-; CHECK-MAXBW-NEXT: [[TMP127:%.*]] = phi <16 x i8> [ [[TMP122]], [[PRED_LOAD_CONTINUE40]] ], [ [[TMP126]], [[PRED_LOAD_IF41]] ]
-; CHECK-MAXBW-NEXT: [[TMP128:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
-; CHECK-MAXBW-NEXT: br i1 [[TMP128]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44:%.*]]
-; CHECK-MAXBW: pred.load.if43:
-; CHECK-MAXBW-NEXT: [[TMP129:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-MAXBW-NEXT: [[TMP130:%.*]] = load i8, ptr [[TMP129]], align 1
-; CHECK-MAXBW-NEXT: [[TMP131:%.*]] = insertelement <16 x i8> [[TMP127]], i8 [[TMP130]], i32 6
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE44]]
-; CHECK-MAXBW: pred.load.continue44:
-; CHECK-MAXBW-NEXT: [[TMP132:%.*]] = phi <16 x i8> [ [[TMP127]], [[PRED_LOAD_CONTINUE42]] ], [ [[TMP131]], [[PRED_LOAD_IF43]] ]
-; CHECK-MAXBW-NEXT: [[TMP133:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
-; CHECK-MAXBW-NEXT: br i1 [[TMP133]], label [[PRED_LOAD_IF45:%.*]], label [[PRED_LOAD_CONTINUE46:%.*]]
-; CHECK-MAXBW: pred.load.if45:
-; CHECK-MAXBW-NEXT: [[TMP134:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-MAXBW-NEXT: [[TMP135:%.*]] = load i8, ptr [[TMP134]], align 1
-; CHECK-MAXBW-NEXT: [[TMP136:%.*]] = insertelement <16 x i8> [[TMP132]], i8 [[TMP135]], i32 7
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE46]]
-; CHECK-MAXBW: pred.load.continue46:
-; CHECK-MAXBW-NEXT: [[TMP137:%.*]] = phi <16 x i8> [ [[TMP132]], [[PRED_LOAD_CONTINUE44]] ], [ [[TMP136]], [[PRED_LOAD_IF45]] ]
-; CHECK-MAXBW-NEXT: [[TMP138:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
-; CHECK-MAXBW-NEXT: br i1 [[TMP138]], label [[PRED_LOAD_IF47:%.*]], label [[PRED_LOAD_CONTINUE48:%.*]]
-; CHECK-MAXBW: pred.load.if47:
-; CHECK-MAXBW-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-MAXBW-NEXT: [[TMP140:%.*]] = load i8, ptr [[TMP139]], align 1
-; CHECK-MAXBW-NEXT: [[TMP141:%.*]] = insertelement <16 x i8> [[TMP137]], i8 [[TMP140]], i32 8
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE48]]
-; CHECK-MAXBW: pred.load.continue48:
-; CHECK-MAXBW-NEXT: [[TMP142:%.*]] = phi <16 x i8> [ [[TMP137]], [[PRED_LOAD_CONTINUE46]] ], [ [[TMP141]], [[PRED_LOAD_IF47]] ]
-; CHECK-MAXBW-NEXT: [[TMP143:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
-; CHECK-MAXBW-NEXT: br i1 [[TMP143]], label [[PRED_LOAD_IF49:%.*]], label [[PRED_LOAD_CONTINUE50:%.*]]
-; CHECK-MAXBW: pred.load.if49:
-; CHECK-MAXBW-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-MAXBW-NEXT: [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
-; CHECK-MAXBW-NEXT: [[TMP146:%.*]] = insertelement <16 x i8> [[TMP142]], i8 [[TMP145]], i32 9
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE50]]
-; CHECK-MAXBW: pred.load.continue50:
-; CHECK-MAXBW-NEXT: [[TMP147:%.*]] = phi <16 x i8> [ [[TMP142]], [[PRED_LOAD_CONTINUE48]] ], [ [[TMP146]], [[PRED_LOAD_IF49]] ]
-; CHECK-MAXBW-NEXT: [[TMP148:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
-; CHECK-MAXBW-NEXT: br i1 [[TMP148]], label [[PRED_LOAD_IF51:%.*]], label [[PRED_LOAD_CONTINUE52:%.*]]
-; CHECK-MAXBW: pred.load.if51:
-; CHECK-MAXBW-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-MAXBW-NEXT: [[TMP150:%.*]] = load i8, ptr [[TMP149]], align 1
-; CHECK-MAXBW-NEXT: [[TMP151:%.*]] = insertelement <16 x i8> [[TMP147]], i8 [[TMP150]], i32 10
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE52]]
-; CHECK-MAXBW: pred.load.continue52:
-; CHECK-MAXBW-NEXT: [[TMP152:%.*]] = phi <16 x i8> [ [[TMP147]], [[PRED_LOAD_CONTINUE50]] ], [ [[TMP151]], [[PRED_LOAD_IF51]] ]
-; CHECK-MAXBW-NEXT: [[TMP153:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
-; CHECK-MAXBW-NEXT: br i1 [[TMP153]], label [[PRED_LOAD_IF53:%.*]], label [[PRED_LOAD_CONTINUE54:%.*]]
-; CHECK-MAXBW: pred.load.if53:
-; CHECK-MAXBW-NEXT: [[TMP154:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-MAXBW-NEXT: [[TMP155:%.*]] = load i8, ptr [[TMP154]], align 1
-; CHECK-MAXBW-NEXT: [[TMP156:%.*]] = insertelement <16 x i8> [[TMP152]], i8 [[TMP155]], i32 11
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE54]]
-; CHECK-MAXBW: pred.load.continue54:
-; CHECK-MAXBW-NEXT: [[TMP157:%.*]] = phi <16 x i8> [ [[TMP152]], [[PRED_LOAD_CONTINUE52]] ], [ [[TMP156]], [[PRED_LOAD_IF53]] ]
-; CHECK-MAXBW-NEXT: [[TMP158:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
-; CHECK-MAXBW-NEXT: br i1 [[TMP158]], label [[PRED_LOAD_IF55:%.*]], label [[PRED_LOAD_CONTINUE56:%.*]]
-; CHECK-MAXBW: pred.load.if55:
-; CHECK-MAXBW-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-MAXBW-NEXT: [[TMP160:%.*]] = load i8, ptr [[TMP159]], align 1
-; CHECK-MAXBW-NEXT: [[TMP161:%.*]] = insertelement <16 x i8> [[TMP157]], i8 [[TMP160]], i32 12
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE56]]
-; CHECK-MAXBW: pred.load.continue56:
-; CHECK-MAXBW-NEXT: [[TMP162:%.*]] = phi <16 x i8> [ [[TMP157]], [[PRED_LOAD_CONTINUE54]] ], [ [[TMP161]], [[PRED_LOAD_IF55]] ]
-; CHECK-MAXBW-NEXT: [[TMP163:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
-; CHECK-MAXBW-NEXT: br i1 [[TMP163]], label [[PRED_LOAD_IF57:%.*]], label [[PRED_LOAD_CONTINUE58:%.*]]
-; CHECK-MAXBW: pred.load.if57:
-; CHECK-MAXBW-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-MAXBW-NEXT: [[TMP165:%.*]] = load i8, ptr [[TMP164]], align 1
-; CHECK-MAXBW-NEXT: [[TMP166:%.*]] = insertelement <16 x i8> [[TMP162]], i8 [[TMP165]], i32 13
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE58]]
-; CHECK-MAXBW: pred.load.continue58:
-; CHECK-MAXBW-NEXT: [[TMP167:%.*]] = phi <16 x i8> [ [[TMP162]], [[PRED_LOAD_CONTINUE56]] ], [ [[TMP166]], [[PRED_LOAD_IF57]] ]
-; CHECK-MAXBW-NEXT: [[TMP168:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
-; CHECK-MAXBW-NEXT: br i1 [[TMP168]], label [[PRED_LOAD_IF59:%.*]], label [[PRED_LOAD_CONTINUE60:%.*]]
-; CHECK-MAXBW: pred.load.if59:
-; CHECK-MAXBW-NEXT: [[TMP169:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-MAXBW-NEXT: [[TMP170:%.*]] = load i8, ptr [[TMP169]], align 1
-; CHECK-MAXBW-NEXT: [[TMP171:%.*]] = insertelement <16 x i8> [[TMP167]], i8 [[TMP170]], i32 14
-; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE60]]
-; CHECK-MAXBW: pred.load.continue60:
-; CHECK-MAXBW-NEXT: [[TMP172:%.*]] = phi <16 x i8> [ [[TMP167]], [[PRED_LOAD_CONTINUE58]] ], [ [[TMP171]], [[PRED_LOAD_IF59]] ]
-; CHECK-MAXBW-NEXT: [[TMP173:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
-; CHECK-MAXBW-NEXT: br i1 [[TMP173]], label [[PRED_LOAD_IF61:%.*]], label [[PRED_LOAD_CONTINUE62]]
-; CHECK-MAXBW: pred.load.if61:
; CHECK-MAXBW-NEXT: [[TMP174:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]]
; CHECK-MAXBW-NEXT: [[TMP175:%.*]] = load i8, ptr [[TMP174]], align 1
; CHECK-MAXBW-NEXT: [[TMP176:%.*]] = insertelement <16 x i8> [[TMP172]], i8 [[TMP175]], i32 15
; CHECK-MAXBW-NEXT: br label [[PRED_LOAD_CONTINUE62]]
-; CHECK-MAXBW: pred.load.continue62:
-; CHECK-MAXBW-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE60]] ], [ [[TMP176]], [[PRED_LOAD_IF61]] ]
+; CHECK-MAXBW: pred.load.continue30:
+; CHECK-MAXBW-NEXT: [[TMP159:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
+; CHECK-MAXBW-NEXT: [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP176]], [[PRED_LOAD_IF29]] ]
; CHECK-MAXBW-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32>
+; CHECK-MAXBW-NEXT: [[TMP97:%.*]] = sext <16 x i8> [[TMP159]] to <16 x i32>
; CHECK-MAXBW-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
; CHECK-MAXBW-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index 737e0b8294aa4..f1601dc933c85 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -1389,10 +1389,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
; CHECK-MAXBW-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP11]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
-; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = sext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i32>
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
; CHECK-MAXBW-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = sext <vscale x 16 x i8> [[WIDE_MASKED_LOAD1]] to <vscale x 16 x i32>
+; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = sext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i32>
; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw <vscale x 16 x i32> [[TMP16]], [[TMP13]]
; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i32> [[TMP17]], <vscale x 16 x i32> zeroinitializer
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP18]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
index 49f663f5703b6..fbcb2f33a5a48 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
@@ -91,7 +91,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
-; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul>
+; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = ir<%accum> + reduce.add (ir<%mul>)
; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16>
; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
; CHECK-NEXT: Successor(s): middle.block, vector.body
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index db64c755d005f..62393df4e6aef 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1219,7 +1219,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ CondOp, VecOp, false, 1);
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1234,7 +1234,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ CondOp, VecOp, false, 1);
VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_FALSE(EVLRecipe.mayHaveSideEffects());
@@ -1591,7 +1591,7 @@ TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ CondOp, VecOp, false, 1);
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
@@ -1606,7 +1606,7 @@ TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ CondOp, VecOp, false, 1);
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_TRUE(isa<VPUser>(&EVLRecipe));
>From 901297f77829fb758221a76010de8409cc71a610 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Mon, 20 Oct 2025 14:51:01 +0100
Subject: [PATCH 2/2] Remove outer-loop comment and re-design getVFScaleFactor
interface
---
llvm/lib/Transforms/Vectorize/VPlan.h | 8 +++++---
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 2 +-
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 668c7a5750398..31f59bc02eb68 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2678,7 +2678,6 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
bool IsConditional = false;
/// The scaling factor, relative to the VF, that this recipe's output is
/// divided by.
- /// For outer-loop reductions this is equal to 1.
/// For in-loop reductions this is equal to 0, to specify that this is equal
/// to the VF (which may not be known yet).
/// For partial-reductions this is equal to another scalar value.
@@ -2763,8 +2762,11 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
VPValue *getCondOp() const {
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
}
- /// Get the factor that the VF of this recipe's output should be scaled by.
- unsigned getVFScaleFactor() const { return VFScaleFactor; }
+ /// Get the factor that the VF of this recipe's output should be scaled by, or
+ /// null if it isn't scaled.
+ std::optional<unsigned> getVFScaleFactor() const {
+ return VFScaleFactor > 1 ? std::make_optional(VFScaleFactor) : std::nullopt;
+ }
};
/// A recipe to represent inloop reduction operations with vector-predication
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 151b6f663b176..98d1992a23454 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -146,7 +146,7 @@ unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
return RR->getVFScaleFactor();
if (auto *RR = dyn_cast<VPReductionRecipe>(R))
- return RR->getVFScaleFactor();
+ return RR->getVFScaleFactor().value_or(1);
assert(
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
VPInstruction::ReductionStartVector) &&
More information about the llvm-branch-commits
mailing list