[llvm] 1858532 - [VPlan] Handle predicated UDiv in VPReplicateRecipe::computeCost.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 15 13:47:19 PDT 2025
Author: Florian Hahn
Date: 2025-09-15T21:46:50+01:00
New Revision: 1858532c48cf5d93aa82966110fe1cada6ab6ba8
URL: https://github.com/llvm/llvm-project/commit/1858532c48cf5d93aa82966110fe1cada6ab6ba8
DIFF: https://github.com/llvm/llvm-project/commit/1858532c48cf5d93aa82966110fe1cada6ab6ba8.diff
LOG: [VPlan] Handle predicated UDiv in VPReplicateRecipe::computeCost.
Account for predicated UDiv,SDiv,URem,SRem in
VPReplicateRecipe::computeCost: compute costs of extra phis and apply
getPredBlockCostDivisor.
Fixes https://github.com/llvm/llvm-project/issues/158660
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 723363fba5724..2844b8348027b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3163,9 +3163,22 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
if (isSingleScalar())
return ScalarCost;
- return ScalarCost * VF.getFixedValue() +
- Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(this),
- to_vector(operands()), VF);
+ ScalarCost = ScalarCost * VF.getFixedValue() +
+ Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(this),
+ to_vector(operands()), VF);
+ // If the recipe is not predicated (i.e. not in a replicate region), return
+ // the scalar cost. Otherwise handle predicated cost.
+ if (!getParent()->getParent()->isReplicator())
+ return ScalarCost;
+
+ // Account for the phi nodes that we will create.
+ ScalarCost += VF.getFixedValue() *
+ Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
+ // Scale the cost by the probability of executing the predicated blocks.
+ // This assumes the predicated block for each vector lane is equally
+ // likely.
+ ScalarCost /= getPredBlockCostDivisor(Ctx.CostKind);
+ return ScalarCost;
}
case Instruction::Load:
case Instruction::Store: {
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 6c1b2568d872a..a3623ddddeef0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -892,6 +892,458 @@ exit:
ret void
}
+; Test case for https://github.com/llvm/llvm-project/issues/158660.
+define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 {
+; CHECK-LABEL: @test_predicated_udiv(
+; CHECK-NEXT: iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i1> poison, i1 [[C:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i1> [[BROADCAST_SPLATINSERT]], <32 x i1> poison, <32 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <32 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE62:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE62]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.usub.sat.v32i32(<32 x i32> [[VEC_IND]], <32 x i32> splat (i32 1))
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
+; CHECK: pred.udiv.if:
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], [[D:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <32 x i32> poison, i32 [[TMP4]], i32 0
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]]
+; CHECK: pred.udiv.continue:
+; CHECK-NEXT: [[TMP6:%.*]] = phi <32 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2:%.*]]
+; CHECK: pred.udiv.if1:
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i32> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = udiv i32 [[TMP8]], [[D]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <32 x i32> [[TMP6]], i32 [[TMP9]], i32 1
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]]
+; CHECK: pred.udiv.continue2:
+; CHECK-NEXT: [[TMP11:%.*]] = phi <32 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF1]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
+; CHECK: pred.udiv.if3:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP1]], i32 2
+; CHECK-NEXT: [[TMP14:%.*]] = udiv i32 [[TMP13]], [[D]]
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <32 x i32> [[TMP11]], i32 [[TMP14]], i32 2
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE4]]
+; CHECK: pred.udiv.continue4:
+; CHECK-NEXT: [[TMP16:%.*]] = phi <32 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE2]] ], [ [[TMP15]], [[PRED_UDIV_IF3]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
+; CHECK: pred.udiv.if5:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i32> [[TMP1]], i32 3
+; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 [[TMP18]], [[D]]
+; CHECK-NEXT: [[TMP20:%.*]] = insertelement <32 x i32> [[TMP16]], i32 [[TMP19]], i32 3
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE6]]
+; CHECK: pred.udiv.continue6:
+; CHECK-NEXT: [[TMP21:%.*]] = phi <32 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP20]], [[PRED_UDIV_IF5]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i1> [[TMP0]], i32 4
+; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
+; CHECK: pred.udiv.if7:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i32> [[TMP1]], i32 4
+; CHECK-NEXT: [[TMP24:%.*]] = udiv i32 [[TMP23]], [[D]]
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP24]], i32 4
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE8]]
+; CHECK: pred.udiv.continue8:
+; CHECK-NEXT: [[TMP26:%.*]] = phi <32 x i32> [ [[TMP21]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP25]], [[PRED_UDIV_IF7]] ]
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP0]], i32 5
+; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
+; CHECK: pred.udiv.if9:
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP1]], i32 5
+; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], [[D]]
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP29]], i32 5
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE10]]
+; CHECK: pred.udiv.continue10:
+; CHECK-NEXT: [[TMP31:%.*]] = phi <32 x i32> [ [[TMP26]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP30]], [[PRED_UDIV_IF9]] ]
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i1> [[TMP0]], i32 6
+; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
+; CHECK: pred.udiv.if11:
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i32> [[TMP1]], i32 6
+; CHECK-NEXT: [[TMP34:%.*]] = udiv i32 [[TMP33]], [[D]]
+; CHECK-NEXT: [[TMP35:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP34]], i32 6
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE12]]
+; CHECK: pred.udiv.continue12:
+; CHECK-NEXT: [[TMP36:%.*]] = phi <32 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP35]], [[PRED_UDIV_IF11]] ]
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i1> [[TMP0]], i32 7
+; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]]
+; CHECK: pred.udiv.if13:
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i32> [[TMP1]], i32 7
+; CHECK-NEXT: [[TMP39:%.*]] = udiv i32 [[TMP38]], [[D]]
+; CHECK-NEXT: [[TMP40:%.*]] = insertelement <32 x i32> [[TMP36]], i32 [[TMP39]], i32 7
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE14]]
+; CHECK: pred.udiv.continue14:
+; CHECK-NEXT: [[TMP41:%.*]] = phi <32 x i32> [ [[TMP36]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP40]], [[PRED_UDIV_IF13]] ]
+; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i1> [[TMP0]], i32 8
+; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16:%.*]]
+; CHECK: pred.udiv.if15:
+; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP1]], i32 8
+; CHECK-NEXT: [[TMP44:%.*]] = udiv i32 [[TMP43]], [[D]]
+; CHECK-NEXT: [[TMP45:%.*]] = insertelement <32 x i32> [[TMP41]], i32 [[TMP44]], i32 8
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE16]]
+; CHECK: pred.udiv.continue16:
+; CHECK-NEXT: [[TMP46:%.*]] = phi <32 x i32> [ [[TMP41]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP45]], [[PRED_UDIV_IF15]] ]
+; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i1> [[TMP0]], i32 9
+; CHECK-NEXT: br i1 [[TMP47]], label [[PRED_UDIV_IF17:%.*]], label [[PRED_UDIV_CONTINUE18:%.*]]
+; CHECK: pred.udiv.if17:
+; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i32> [[TMP1]], i32 9
+; CHECK-NEXT: [[TMP49:%.*]] = udiv i32 [[TMP48]], [[D]]
+; CHECK-NEXT: [[TMP50:%.*]] = insertelement <32 x i32> [[TMP46]], i32 [[TMP49]], i32 9
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE18]]
+; CHECK: pred.udiv.continue18:
+; CHECK-NEXT: [[TMP51:%.*]] = phi <32 x i32> [ [[TMP46]], [[PRED_UDIV_CONTINUE16]] ], [ [[TMP50]], [[PRED_UDIV_IF17]] ]
+; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i1> [[TMP0]], i32 10
+; CHECK-NEXT: br i1 [[TMP52]], label [[PRED_UDIV_IF19:%.*]], label [[PRED_UDIV_CONTINUE20:%.*]]
+; CHECK: pred.udiv.if19:
+; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i32> [[TMP1]], i32 10
+; CHECK-NEXT: [[TMP54:%.*]] = udiv i32 [[TMP53]], [[D]]
+; CHECK-NEXT: [[TMP55:%.*]] = insertelement <32 x i32> [[TMP51]], i32 [[TMP54]], i32 10
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE20]]
+; CHECK: pred.udiv.continue20:
+; CHECK-NEXT: [[TMP56:%.*]] = phi <32 x i32> [ [[TMP51]], [[PRED_UDIV_CONTINUE18]] ], [ [[TMP55]], [[PRED_UDIV_IF19]] ]
+; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP0]], i32 11
+; CHECK-NEXT: br i1 [[TMP57]], label [[PRED_UDIV_IF21:%.*]], label [[PRED_UDIV_CONTINUE22:%.*]]
+; CHECK: pred.udiv.if21:
+; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP1]], i32 11
+; CHECK-NEXT: [[TMP59:%.*]] = udiv i32 [[TMP58]], [[D]]
+; CHECK-NEXT: [[TMP60:%.*]] = insertelement <32 x i32> [[TMP56]], i32 [[TMP59]], i32 11
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE22]]
+; CHECK: pred.udiv.continue22:
+; CHECK-NEXT: [[TMP61:%.*]] = phi <32 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE20]] ], [ [[TMP60]], [[PRED_UDIV_IF21]] ]
+; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i1> [[TMP0]], i32 12
+; CHECK-NEXT: br i1 [[TMP62]], label [[PRED_UDIV_IF23:%.*]], label [[PRED_UDIV_CONTINUE24:%.*]]
+; CHECK: pred.udiv.if23:
+; CHECK-NEXT: [[TMP63:%.*]] = extractelement <32 x i32> [[TMP1]], i32 12
+; CHECK-NEXT: [[TMP64:%.*]] = udiv i32 [[TMP63]], [[D]]
+; CHECK-NEXT: [[TMP65:%.*]] = insertelement <32 x i32> [[TMP61]], i32 [[TMP64]], i32 12
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE24]]
+; CHECK: pred.udiv.continue24:
+; CHECK-NEXT: [[TMP66:%.*]] = phi <32 x i32> [ [[TMP61]], [[PRED_UDIV_CONTINUE22]] ], [ [[TMP65]], [[PRED_UDIV_IF23]] ]
+; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i1> [[TMP0]], i32 13
+; CHECK-NEXT: br i1 [[TMP67]], label [[PRED_UDIV_IF25:%.*]], label [[PRED_UDIV_CONTINUE26:%.*]]
+; CHECK: pred.udiv.if25:
+; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i32> [[TMP1]], i32 13
+; CHECK-NEXT: [[TMP69:%.*]] = udiv i32 [[TMP68]], [[D]]
+; CHECK-NEXT: [[TMP70:%.*]] = insertelement <32 x i32> [[TMP66]], i32 [[TMP69]], i32 13
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE26]]
+; CHECK: pred.udiv.continue26:
+; CHECK-NEXT: [[TMP71:%.*]] = phi <32 x i32> [ [[TMP66]], [[PRED_UDIV_CONTINUE24]] ], [ [[TMP70]], [[PRED_UDIV_IF25]] ]
+; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i1> [[TMP0]], i32 14
+; CHECK-NEXT: br i1 [[TMP72]], label [[PRED_UDIV_IF27:%.*]], label [[PRED_UDIV_CONTINUE28:%.*]]
+; CHECK: pred.udiv.if27:
+; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP1]], i32 14
+; CHECK-NEXT: [[TMP74:%.*]] = udiv i32 [[TMP73]], [[D]]
+; CHECK-NEXT: [[TMP75:%.*]] = insertelement <32 x i32> [[TMP71]], i32 [[TMP74]], i32 14
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE28]]
+; CHECK: pred.udiv.continue28:
+; CHECK-NEXT: [[TMP76:%.*]] = phi <32 x i32> [ [[TMP71]], [[PRED_UDIV_CONTINUE26]] ], [ [[TMP75]], [[PRED_UDIV_IF27]] ]
+; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i1> [[TMP0]], i32 15
+; CHECK-NEXT: br i1 [[TMP77]], label [[PRED_UDIV_IF29:%.*]], label [[PRED_UDIV_CONTINUE30:%.*]]
+; CHECK: pred.udiv.if29:
+; CHECK-NEXT: [[TMP78:%.*]] = extractelement <32 x i32> [[TMP1]], i32 15
+; CHECK-NEXT: [[TMP79:%.*]] = udiv i32 [[TMP78]], [[D]]
+; CHECK-NEXT: [[TMP80:%.*]] = insertelement <32 x i32> [[TMP76]], i32 [[TMP79]], i32 15
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE30]]
+; CHECK: pred.udiv.continue30:
+; CHECK-NEXT: [[TMP81:%.*]] = phi <32 x i32> [ [[TMP76]], [[PRED_UDIV_CONTINUE28]] ], [ [[TMP80]], [[PRED_UDIV_IF29]] ]
+; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i1> [[TMP0]], i32 16
+; CHECK-NEXT: br i1 [[TMP82]], label [[PRED_UDIV_IF31:%.*]], label [[PRED_UDIV_CONTINUE32:%.*]]
+; CHECK: pred.udiv.if31:
+; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i32> [[TMP1]], i32 16
+; CHECK-NEXT: [[TMP84:%.*]] = udiv i32 [[TMP83]], [[D]]
+; CHECK-NEXT: [[TMP85:%.*]] = insertelement <32 x i32> [[TMP81]], i32 [[TMP84]], i32 16
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE32]]
+; CHECK: pred.udiv.continue32:
+; CHECK-NEXT: [[TMP86:%.*]] = phi <32 x i32> [ [[TMP81]], [[PRED_UDIV_CONTINUE30]] ], [ [[TMP85]], [[PRED_UDIV_IF31]] ]
+; CHECK-NEXT: [[TMP87:%.*]] = extractelement <32 x i1> [[TMP0]], i32 17
+; CHECK-NEXT: br i1 [[TMP87]], label [[PRED_UDIV_IF33:%.*]], label [[PRED_UDIV_CONTINUE34:%.*]]
+; CHECK: pred.udiv.if33:
+; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP1]], i32 17
+; CHECK-NEXT: [[TMP89:%.*]] = udiv i32 [[TMP88]], [[D]]
+; CHECK-NEXT: [[TMP90:%.*]] = insertelement <32 x i32> [[TMP86]], i32 [[TMP89]], i32 17
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE34]]
+; CHECK: pred.udiv.continue34:
+; CHECK-NEXT: [[TMP91:%.*]] = phi <32 x i32> [ [[TMP86]], [[PRED_UDIV_CONTINUE32]] ], [ [[TMP90]], [[PRED_UDIV_IF33]] ]
+; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i1> [[TMP0]], i32 18
+; CHECK-NEXT: br i1 [[TMP92]], label [[PRED_UDIV_IF35:%.*]], label [[PRED_UDIV_CONTINUE36:%.*]]
+; CHECK: pred.udiv.if35:
+; CHECK-NEXT: [[TMP93:%.*]] = extractelement <32 x i32> [[TMP1]], i32 18
+; CHECK-NEXT: [[TMP94:%.*]] = udiv i32 [[TMP93]], [[D]]
+; CHECK-NEXT: [[TMP95:%.*]] = insertelement <32 x i32> [[TMP91]], i32 [[TMP94]], i32 18
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE36]]
+; CHECK: pred.udiv.continue36:
+; CHECK-NEXT: [[TMP96:%.*]] = phi <32 x i32> [ [[TMP91]], [[PRED_UDIV_CONTINUE34]] ], [ [[TMP95]], [[PRED_UDIV_IF35]] ]
+; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i1> [[TMP0]], i32 19
+; CHECK-NEXT: br i1 [[TMP97]], label [[PRED_UDIV_IF37:%.*]], label [[PRED_UDIV_CONTINUE38:%.*]]
+; CHECK: pred.udiv.if37:
+; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i32> [[TMP1]], i32 19
+; CHECK-NEXT: [[TMP99:%.*]] = udiv i32 [[TMP98]], [[D]]
+; CHECK-NEXT: [[TMP100:%.*]] = insertelement <32 x i32> [[TMP96]], i32 [[TMP99]], i32 19
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE38]]
+; CHECK: pred.udiv.continue38:
+; CHECK-NEXT: [[TMP101:%.*]] = phi <32 x i32> [ [[TMP96]], [[PRED_UDIV_CONTINUE36]] ], [ [[TMP100]], [[PRED_UDIV_IF37]] ]
+; CHECK-NEXT: [[TMP102:%.*]] = extractelement <32 x i1> [[TMP0]], i32 20
+; CHECK-NEXT: br i1 [[TMP102]], label [[PRED_UDIV_IF39:%.*]], label [[PRED_UDIV_CONTINUE40:%.*]]
+; CHECK: pred.udiv.if39:
+; CHECK-NEXT: [[TMP103:%.*]] = extractelement <32 x i32> [[TMP1]], i32 20
+; CHECK-NEXT: [[TMP104:%.*]] = udiv i32 [[TMP103]], [[D]]
+; CHECK-NEXT: [[TMP105:%.*]] = insertelement <32 x i32> [[TMP101]], i32 [[TMP104]], i32 20
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE40]]
+; CHECK: pred.udiv.continue40:
+; CHECK-NEXT: [[TMP106:%.*]] = phi <32 x i32> [ [[TMP101]], [[PRED_UDIV_CONTINUE38]] ], [ [[TMP105]], [[PRED_UDIV_IF39]] ]
+; CHECK-NEXT: [[TMP107:%.*]] = extractelement <32 x i1> [[TMP0]], i32 21
+; CHECK-NEXT: br i1 [[TMP107]], label [[PRED_UDIV_IF41:%.*]], label [[PRED_UDIV_CONTINUE42:%.*]]
+; CHECK: pred.udiv.if41:
+; CHECK-NEXT: [[TMP108:%.*]] = extractelement <32 x i32> [[TMP1]], i32 21
+; CHECK-NEXT: [[TMP109:%.*]] = udiv i32 [[TMP108]], [[D]]
+; CHECK-NEXT: [[TMP110:%.*]] = insertelement <32 x i32> [[TMP106]], i32 [[TMP109]], i32 21
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE42]]
+; CHECK: pred.udiv.continue42:
+; CHECK-NEXT: [[TMP111:%.*]] = phi <32 x i32> [ [[TMP106]], [[PRED_UDIV_CONTINUE40]] ], [ [[TMP110]], [[PRED_UDIV_IF41]] ]
+; CHECK-NEXT: [[TMP112:%.*]] = extractelement <32 x i1> [[TMP0]], i32 22
+; CHECK-NEXT: br i1 [[TMP112]], label [[PRED_UDIV_IF43:%.*]], label [[PRED_UDIV_CONTINUE44:%.*]]
+; CHECK: pred.udiv.if43:
+; CHECK-NEXT: [[TMP113:%.*]] = extractelement <32 x i32> [[TMP1]], i32 22
+; CHECK-NEXT: [[TMP114:%.*]] = udiv i32 [[TMP113]], [[D]]
+; CHECK-NEXT: [[TMP115:%.*]] = insertelement <32 x i32> [[TMP111]], i32 [[TMP114]], i32 22
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE44]]
+; CHECK: pred.udiv.continue44:
+; CHECK-NEXT: [[TMP116:%.*]] = phi <32 x i32> [ [[TMP111]], [[PRED_UDIV_CONTINUE42]] ], [ [[TMP115]], [[PRED_UDIV_IF43]] ]
+; CHECK-NEXT: [[TMP117:%.*]] = extractelement <32 x i1> [[TMP0]], i32 23
+; CHECK-NEXT: br i1 [[TMP117]], label [[PRED_UDIV_IF45:%.*]], label [[PRED_UDIV_CONTINUE46:%.*]]
+; CHECK: pred.udiv.if45:
+; CHECK-NEXT: [[TMP118:%.*]] = extractelement <32 x i32> [[TMP1]], i32 23
+; CHECK-NEXT: [[TMP119:%.*]] = udiv i32 [[TMP118]], [[D]]
+; CHECK-NEXT: [[TMP120:%.*]] = insertelement <32 x i32> [[TMP116]], i32 [[TMP119]], i32 23
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE46]]
+; CHECK: pred.udiv.continue46:
+; CHECK-NEXT: [[TMP121:%.*]] = phi <32 x i32> [ [[TMP116]], [[PRED_UDIV_CONTINUE44]] ], [ [[TMP120]], [[PRED_UDIV_IF45]] ]
+; CHECK-NEXT: [[TMP122:%.*]] = extractelement <32 x i1> [[TMP0]], i32 24
+; CHECK-NEXT: br i1 [[TMP122]], label [[PRED_UDIV_IF47:%.*]], label [[PRED_UDIV_CONTINUE48:%.*]]
+; CHECK: pred.udiv.if47:
+; CHECK-NEXT: [[TMP123:%.*]] = extractelement <32 x i32> [[TMP1]], i32 24
+; CHECK-NEXT: [[TMP124:%.*]] = udiv i32 [[TMP123]], [[D]]
+; CHECK-NEXT: [[TMP125:%.*]] = insertelement <32 x i32> [[TMP121]], i32 [[TMP124]], i32 24
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE48]]
+; CHECK: pred.udiv.continue48:
+; CHECK-NEXT: [[TMP126:%.*]] = phi <32 x i32> [ [[TMP121]], [[PRED_UDIV_CONTINUE46]] ], [ [[TMP125]], [[PRED_UDIV_IF47]] ]
+; CHECK-NEXT: [[TMP127:%.*]] = extractelement <32 x i1> [[TMP0]], i32 25
+; CHECK-NEXT: br i1 [[TMP127]], label [[PRED_UDIV_IF49:%.*]], label [[PRED_UDIV_CONTINUE50:%.*]]
+; CHECK: pred.udiv.if49:
+; CHECK-NEXT: [[TMP128:%.*]] = extractelement <32 x i32> [[TMP1]], i32 25
+; CHECK-NEXT: [[TMP129:%.*]] = udiv i32 [[TMP128]], [[D]]
+; CHECK-NEXT: [[TMP130:%.*]] = insertelement <32 x i32> [[TMP126]], i32 [[TMP129]], i32 25
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE50]]
+; CHECK: pred.udiv.continue50:
+; CHECK-NEXT: [[TMP131:%.*]] = phi <32 x i32> [ [[TMP126]], [[PRED_UDIV_CONTINUE48]] ], [ [[TMP130]], [[PRED_UDIV_IF49]] ]
+; CHECK-NEXT: [[TMP132:%.*]] = extractelement <32 x i1> [[TMP0]], i32 26
+; CHECK-NEXT: br i1 [[TMP132]], label [[PRED_UDIV_IF51:%.*]], label [[PRED_UDIV_CONTINUE52:%.*]]
+; CHECK: pred.udiv.if51:
+; CHECK-NEXT: [[TMP133:%.*]] = extractelement <32 x i32> [[TMP1]], i32 26
+; CHECK-NEXT: [[TMP134:%.*]] = udiv i32 [[TMP133]], [[D]]
+; CHECK-NEXT: [[TMP135:%.*]] = insertelement <32 x i32> [[TMP131]], i32 [[TMP134]], i32 26
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE52]]
+; CHECK: pred.udiv.continue52:
+; CHECK-NEXT: [[TMP136:%.*]] = phi <32 x i32> [ [[TMP131]], [[PRED_UDIV_CONTINUE50]] ], [ [[TMP135]], [[PRED_UDIV_IF51]] ]
+; CHECK-NEXT: [[TMP137:%.*]] = extractelement <32 x i1> [[TMP0]], i32 27
+; CHECK-NEXT: br i1 [[TMP137]], label [[PRED_UDIV_IF53:%.*]], label [[PRED_UDIV_CONTINUE54:%.*]]
+; CHECK: pred.udiv.if53:
+; CHECK-NEXT: [[TMP138:%.*]] = extractelement <32 x i32> [[TMP1]], i32 27
+; CHECK-NEXT: [[TMP139:%.*]] = udiv i32 [[TMP138]], [[D]]
+; CHECK-NEXT: [[TMP140:%.*]] = insertelement <32 x i32> [[TMP136]], i32 [[TMP139]], i32 27
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE54]]
+; CHECK: pred.udiv.continue54:
+; CHECK-NEXT: [[TMP141:%.*]] = phi <32 x i32> [ [[TMP136]], [[PRED_UDIV_CONTINUE52]] ], [ [[TMP140]], [[PRED_UDIV_IF53]] ]
+; CHECK-NEXT: [[TMP142:%.*]] = extractelement <32 x i1> [[TMP0]], i32 28
+; CHECK-NEXT: br i1 [[TMP142]], label [[PRED_UDIV_IF55:%.*]], label [[PRED_UDIV_CONTINUE56:%.*]]
+; CHECK: pred.udiv.if55:
+; CHECK-NEXT: [[TMP143:%.*]] = extractelement <32 x i32> [[TMP1]], i32 28
+; CHECK-NEXT: [[TMP144:%.*]] = udiv i32 [[TMP143]], [[D]]
+; CHECK-NEXT: [[TMP145:%.*]] = insertelement <32 x i32> [[TMP141]], i32 [[TMP144]], i32 28
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE56]]
+; CHECK: pred.udiv.continue56:
+; CHECK-NEXT: [[TMP146:%.*]] = phi <32 x i32> [ [[TMP141]], [[PRED_UDIV_CONTINUE54]] ], [ [[TMP145]], [[PRED_UDIV_IF55]] ]
+; CHECK-NEXT: [[TMP147:%.*]] = extractelement <32 x i1> [[TMP0]], i32 29
+; CHECK-NEXT: br i1 [[TMP147]], label [[PRED_UDIV_IF57:%.*]], label [[PRED_UDIV_CONTINUE58:%.*]]
+; CHECK: pred.udiv.if57:
+; CHECK-NEXT: [[TMP148:%.*]] = extractelement <32 x i32> [[TMP1]], i32 29
+; CHECK-NEXT: [[TMP149:%.*]] = udiv i32 [[TMP148]], [[D]]
+; CHECK-NEXT: [[TMP150:%.*]] = insertelement <32 x i32> [[TMP146]], i32 [[TMP149]], i32 29
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE58]]
+; CHECK: pred.udiv.continue58:
+; CHECK-NEXT: [[TMP151:%.*]] = phi <32 x i32> [ [[TMP146]], [[PRED_UDIV_CONTINUE56]] ], [ [[TMP150]], [[PRED_UDIV_IF57]] ]
+; CHECK-NEXT: [[TMP152:%.*]] = extractelement <32 x i1> [[TMP0]], i32 30
+; CHECK-NEXT: br i1 [[TMP152]], label [[PRED_UDIV_IF59:%.*]], label [[PRED_UDIV_CONTINUE60:%.*]]
+; CHECK: pred.udiv.if59:
+; CHECK-NEXT: [[TMP153:%.*]] = extractelement <32 x i32> [[TMP1]], i32 30
+; CHECK-NEXT: [[TMP154:%.*]] = udiv i32 [[TMP153]], [[D]]
+; CHECK-NEXT: [[TMP155:%.*]] = insertelement <32 x i32> [[TMP151]], i32 [[TMP154]], i32 30
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE60]]
+; CHECK: pred.udiv.continue60:
+; CHECK-NEXT: [[TMP156:%.*]] = phi <32 x i32> [ [[TMP151]], [[PRED_UDIV_CONTINUE58]] ], [ [[TMP155]], [[PRED_UDIV_IF59]] ]
+; CHECK-NEXT: [[TMP157:%.*]] = extractelement <32 x i1> [[TMP0]], i32 31
+; CHECK-NEXT: br i1 [[TMP157]], label [[PRED_UDIV_IF61:%.*]], label [[PRED_UDIV_CONTINUE62]]
+; CHECK: pred.udiv.if61:
+; CHECK-NEXT: [[TMP158:%.*]] = extractelement <32 x i32> [[TMP1]], i32 31
+; CHECK-NEXT: [[TMP159:%.*]] = udiv i32 [[TMP158]], [[D]]
+; CHECK-NEXT: [[TMP160:%.*]] = insertelement <32 x i32> [[TMP156]], i32 [[TMP159]], i32 31
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE62]]
+; CHECK: pred.udiv.continue62:
+; CHECK-NEXT: [[TMP161:%.*]] = phi <32 x i32> [ [[TMP156]], [[PRED_UDIV_CONTINUE60]] ], [ [[TMP160]], [[PRED_UDIV_IF61]] ]
+; CHECK-NEXT: [[TMP162:%.*]] = zext <32 x i32> [[TMP161]] to <32 x i64>
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[BROADCAST_SPLAT]], <32 x i64> zeroinitializer, <32 x i64> [[TMP162]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32)
+; CHECK-NEXT: [[TMP163:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT: br i1 [[TMP163]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP164:%.*]] = extractelement <32 x i64> [[PREDPHI]], i32 31
+; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK: vec.epilog.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF13:![0-9]+]]
+; CHECK: vec.epilog.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT63:%.*]] = insertelement <8 x i1> poison, i1 [[C]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT64:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT63]], <8 x i1> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP165:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT64]], splat (i1 true)
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT65:%.*]] = insertelement <8 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT66:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT65]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[BROADCAST_SPLAT66]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK: vec.epilog.vector.body:
+; CHECK-NEXT: [[INDEX67:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT86:%.*]], [[PRED_UDIV_CONTINUE84:%.*]] ]
+; CHECK-NEXT: [[VEC_IND68:%.*]] = phi <8 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT87:%.*]], [[PRED_UDIV_CONTINUE84]] ]
+; CHECK-NEXT: [[TMP166:%.*]] = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> [[VEC_IND68]], <8 x i32> splat (i32 1))
+; CHECK-NEXT: [[TMP167:%.*]] = extractelement <8 x i1> [[TMP165]], i32 0
+; CHECK-NEXT: br i1 [[TMP167]], label [[PRED_UDIV_IF69:%.*]], label [[PRED_UDIV_CONTINUE70:%.*]]
+; CHECK: pred.udiv.if69:
+; CHECK-NEXT: [[TMP168:%.*]] = extractelement <8 x i32> [[TMP166]], i32 0
+; CHECK-NEXT: [[TMP169:%.*]] = udiv i32 [[TMP168]], [[D]]
+; CHECK-NEXT: [[TMP170:%.*]] = insertelement <8 x i32> poison, i32 [[TMP169]], i32 0
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE70]]
+; CHECK: pred.udiv.continue70:
+; CHECK-NEXT: [[TMP171:%.*]] = phi <8 x i32> [ poison, [[VEC_EPILOG_VECTOR_BODY]] ], [ [[TMP170]], [[PRED_UDIV_IF69]] ]
+; CHECK-NEXT: [[TMP172:%.*]] = extractelement <8 x i1> [[TMP165]], i32 1
+; CHECK-NEXT: br i1 [[TMP172]], label [[PRED_UDIV_IF71:%.*]], label [[PRED_UDIV_CONTINUE72:%.*]]
+; CHECK: pred.udiv.if71:
+; CHECK-NEXT: [[TMP173:%.*]] = extractelement <8 x i32> [[TMP166]], i32 1
+; CHECK-NEXT: [[TMP174:%.*]] = udiv i32 [[TMP173]], [[D]]
+; CHECK-NEXT: [[TMP175:%.*]] = insertelement <8 x i32> [[TMP171]], i32 [[TMP174]], i32 1
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE72]]
+; CHECK: pred.udiv.continue72:
+; CHECK-NEXT: [[TMP176:%.*]] = phi <8 x i32> [ [[TMP171]], [[PRED_UDIV_CONTINUE70]] ], [ [[TMP175]], [[PRED_UDIV_IF71]] ]
+; CHECK-NEXT: [[TMP177:%.*]] = extractelement <8 x i1> [[TMP165]], i32 2
+; CHECK-NEXT: br i1 [[TMP177]], label [[PRED_UDIV_IF73:%.*]], label [[PRED_UDIV_CONTINUE74:%.*]]
+; CHECK: pred.udiv.if73:
+; CHECK-NEXT: [[TMP178:%.*]] = extractelement <8 x i32> [[TMP166]], i32 2
+; CHECK-NEXT: [[TMP179:%.*]] = udiv i32 [[TMP178]], [[D]]
+; CHECK-NEXT: [[TMP180:%.*]] = insertelement <8 x i32> [[TMP176]], i32 [[TMP179]], i32 2
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE74]]
+; CHECK: pred.udiv.continue74:
+; CHECK-NEXT: [[TMP181:%.*]] = phi <8 x i32> [ [[TMP176]], [[PRED_UDIV_CONTINUE72]] ], [ [[TMP180]], [[PRED_UDIV_IF73]] ]
+; CHECK-NEXT: [[TMP182:%.*]] = extractelement <8 x i1> [[TMP165]], i32 3
+; CHECK-NEXT: br i1 [[TMP182]], label [[PRED_UDIV_IF75:%.*]], label [[PRED_UDIV_CONTINUE76:%.*]]
+; CHECK: pred.udiv.if75:
+; CHECK-NEXT: [[TMP183:%.*]] = extractelement <8 x i32> [[TMP166]], i32 3
+; CHECK-NEXT: [[TMP184:%.*]] = udiv i32 [[TMP183]], [[D]]
+; CHECK-NEXT: [[TMP185:%.*]] = insertelement <8 x i32> [[TMP181]], i32 [[TMP184]], i32 3
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE76]]
+; CHECK: pred.udiv.continue76:
+; CHECK-NEXT: [[TMP186:%.*]] = phi <8 x i32> [ [[TMP181]], [[PRED_UDIV_CONTINUE74]] ], [ [[TMP185]], [[PRED_UDIV_IF75]] ]
+; CHECK-NEXT: [[TMP187:%.*]] = extractelement <8 x i1> [[TMP165]], i32 4
+; CHECK-NEXT: br i1 [[TMP187]], label [[PRED_UDIV_IF77:%.*]], label [[PRED_UDIV_CONTINUE78:%.*]]
+; CHECK: pred.udiv.if77:
+; CHECK-NEXT: [[TMP188:%.*]] = extractelement <8 x i32> [[TMP166]], i32 4
+; CHECK-NEXT: [[TMP189:%.*]] = udiv i32 [[TMP188]], [[D]]
+; CHECK-NEXT: [[TMP190:%.*]] = insertelement <8 x i32> [[TMP186]], i32 [[TMP189]], i32 4
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE78]]
+; CHECK: pred.udiv.continue78:
+; CHECK-NEXT: [[TMP191:%.*]] = phi <8 x i32> [ [[TMP186]], [[PRED_UDIV_CONTINUE76]] ], [ [[TMP190]], [[PRED_UDIV_IF77]] ]
+; CHECK-NEXT: [[TMP192:%.*]] = extractelement <8 x i1> [[TMP165]], i32 5
+; CHECK-NEXT: br i1 [[TMP192]], label [[PRED_UDIV_IF79:%.*]], label [[PRED_UDIV_CONTINUE80:%.*]]
+; CHECK: pred.udiv.if79:
+; CHECK-NEXT: [[TMP193:%.*]] = extractelement <8 x i32> [[TMP166]], i32 5
+; CHECK-NEXT: [[TMP194:%.*]] = udiv i32 [[TMP193]], [[D]]
+; CHECK-NEXT: [[TMP195:%.*]] = insertelement <8 x i32> [[TMP191]], i32 [[TMP194]], i32 5
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE80]]
+; CHECK: pred.udiv.continue80:
+; CHECK-NEXT: [[TMP196:%.*]] = phi <8 x i32> [ [[TMP191]], [[PRED_UDIV_CONTINUE78]] ], [ [[TMP195]], [[PRED_UDIV_IF79]] ]
+; CHECK-NEXT: [[TMP197:%.*]] = extractelement <8 x i1> [[TMP165]], i32 6
+; CHECK-NEXT: br i1 [[TMP197]], label [[PRED_UDIV_IF81:%.*]], label [[PRED_UDIV_CONTINUE82:%.*]]
+; CHECK: pred.udiv.if81:
+; CHECK-NEXT: [[TMP198:%.*]] = extractelement <8 x i32> [[TMP166]], i32 6
+; CHECK-NEXT: [[TMP199:%.*]] = udiv i32 [[TMP198]], [[D]]
+; CHECK-NEXT: [[TMP200:%.*]] = insertelement <8 x i32> [[TMP196]], i32 [[TMP199]], i32 6
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE82]]
+; CHECK: pred.udiv.continue82:
+; CHECK-NEXT: [[TMP201:%.*]] = phi <8 x i32> [ [[TMP196]], [[PRED_UDIV_CONTINUE80]] ], [ [[TMP200]], [[PRED_UDIV_IF81]] ]
+; CHECK-NEXT: [[TMP202:%.*]] = extractelement <8 x i1> [[TMP165]], i32 7
+; CHECK-NEXT: br i1 [[TMP202]], label [[PRED_UDIV_IF83:%.*]], label [[PRED_UDIV_CONTINUE84]]
+; CHECK: pred.udiv.if83:
+; CHECK-NEXT: [[TMP203:%.*]] = extractelement <8 x i32> [[TMP166]], i32 7
+; CHECK-NEXT: [[TMP204:%.*]] = udiv i32 [[TMP203]], [[D]]
+; CHECK-NEXT: [[TMP205:%.*]] = insertelement <8 x i32> [[TMP201]], i32 [[TMP204]], i32 7
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE84]]
+; CHECK: pred.udiv.continue84:
+; CHECK-NEXT: [[TMP206:%.*]] = phi <8 x i32> [ [[TMP201]], [[PRED_UDIV_CONTINUE82]] ], [ [[TMP205]], [[PRED_UDIV_IF83]] ]
+; CHECK-NEXT: [[TMP207:%.*]] = zext <8 x i32> [[TMP206]] to <8 x i64>
+; CHECK-NEXT: [[PREDPHI85:%.*]] = select <8 x i1> [[BROADCAST_SPLAT64]], <8 x i64> zeroinitializer, <8 x i64> [[TMP207]]
+; CHECK-NEXT: [[INDEX_NEXT86]] = add nuw i32 [[INDEX67]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT87]] = add <8 x i32> [[VEC_IND68]], splat (i32 8)
+; CHECK-NEXT: [[TMP208:%.*]] = icmp eq i32 [[INDEX_NEXT86]], 1000
+; CHECK-NEXT: br i1 [[TMP208]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: vec.epilog.middle.block:
+; CHECK-NEXT: [[TMP209:%.*]] = extractelement <8 x i64> [[PREDPHI85]], i32 7
+; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK: vec.epilog.scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL88:%.*]] = phi i32 [ 1000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: loop.header:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL88]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @llvm.usub.sat.i32(i32 [[IV]], i32 1)
+; CHECK-NEXT: [[UDIV:%.*]] = udiv i32 [[CALL]], [[D]]
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[UDIV]] to i64
+; CHECK-NEXT: br label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[ZEXT]], [[THEN]] ], [ 0, [[LOOP_HEADER]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 1000
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[MERGE_LCSSA:%.*]] = phi i64 [ [[MERGE]], [[LOOP_LATCH]] ], [ [[TMP164]], [[MIDDLE_BLOCK]] ], [ [[TMP209]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i64 [[MERGE_LCSSA]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 %c, label %loop.latch, label %then
+
+then:
+ %call = tail call i32 @llvm.usub.sat.i32(i32 %iv, i32 1)
+ %udiv = udiv i32 %call, %d
+ %zext = zext i32 %udiv to i64
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i64 [ %zext, %then ], [ 0, %loop.header ]
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv, 1000
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret i64 %merge
+}
+
attributes #0 = { "target-cpu"="znver4" }
attributes #1 = { "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
attributes #2 = { "target-cpu"="znver3" }
More information about the llvm-commits
mailing list