[llvm] [VPlan] Remove original loop if dead after vectorization. (PR #155497)

Tue Aug 26 14:00:23 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-powerpc

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Build on top of https://github.com/llvm/llvm-project/pull/154510 to
completely remove dead scalar loops.

Depends on https://github.com/llvm/llvm-project/pull/154510. (Included
in the PR)

---

Patch is 2.94 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155497.diff


337 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+51-20) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+7-5) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+1-1) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.h (+1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll (+4-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll (+6-20) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll (+4-32) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+34-30) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll (+1-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+8-10) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll (+1-31) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll (+3-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll (+6-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll (+1-14) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll (+24-30) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll (+43-57) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+23-31) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+15-29) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll (+4-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave_count_for_known_tc.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll (+2-17) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll (+4-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll (+8-23) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+25-32) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll (+2-13) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll (-51) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+25-228) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll (+10-8) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll (+22-18) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll (+75-54) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+87-81) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll (+63-60) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+3-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll (+2-16) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+5-52) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+22-127) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll (+151-25) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll (+18-61) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+15-26) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll (+6-11) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll (+3-9) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+6-9) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+20-23) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll (+15-11) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+3-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll (+3-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll (+11-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll (+18-106) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+7-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+42-32) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll (+5-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+5-221) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll (+4-40) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-constant-ops.ll (+16-16) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-metadata.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll (+12-84) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll (+3-37) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll (+56-56) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll (+36-36) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll (+3-38) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll (+6-8) 
- (modified) llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll (+6-16) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll (+18-61) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll (+3-4) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll (+33-75) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll (+34-152) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll (+19-171) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll (+1-2) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll (+18-24) 
- (modified) llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll (+1-13) 
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll (+3-33) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll (+33-208) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll (+34-77) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll (+4-27) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll (+44-302) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll (+4-26) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/f16.ll (+1-14) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll (+16-20) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll (+5-7) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll (+10-62) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (+63-948) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll (+14-10) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll (+3-36) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll (+10-68) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll (+2-20) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll (+6-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll (+30-22) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll (+1-24) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll (+2-26) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll (+2-23) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll (+65-296) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll (+2-13) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+45-56) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll (+9-57) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll (+12-77) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll (+14-80) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll (+17-103) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+43-88) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll (+72-90) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll (+36-45) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll (+41-63) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll (+30-170) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll (+9-69) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll (+13-71) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll (+1-14) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll (+49-227) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll (+16-128) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll (+8-12) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll (+1-12) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll (+6-42) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll (+2-20) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll (+2-14) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll (+49-227) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll (+9-58) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll (+19-104) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll (+1-11) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll (+9-73) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll (+1-16) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll (+4-5) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+62-232) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll (+8-29) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll (+1-15) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll (+4-5) 
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll (+3-11) 
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll (+1-11) 
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll (+1-22) 
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll (+1-13) 
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll (+1-25) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll (+11-12) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll (+5-53) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll (+7-26) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+15-51) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll (+8-10) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll (+3-24) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll (+15-15) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+10-26) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll (+24-30) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll (+8-10) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll (+6-46) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+41-71) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll (+28-83) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll (+5-7) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll (+8-12) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll (+4-6) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaving.ll (+3-15) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll (+6-8) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+81-317) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll (+8-12) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll (+24-31) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll (+4-28) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/optsize.ll (+14-102) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll (+7-11) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll (+5-7) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll (+10-14) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll (+2-17) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr34438.ll (+4-17) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr36524.ll (+4-6) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll (+2-20) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr81872.ll (+6-27) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll (+12-48) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll (+8-10) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll (+1-19) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll (+1-15) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/small-size.ll (+22-46) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll (+2-36) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll (+6-50) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll (+33-61) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll (+3-29) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll (+1-15) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll (+1-16) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll (+3-4) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll (+6-44) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll (+5-26) 
- (modified) llvm/test/Transforms/LoopVectorize/assume.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/blend-in-header.ll (+12-16) 
- (modified) llvm/test/Transforms/LoopVectorize/bsd_regex.ll (+1-5) 
- (modified) llvm/test/Transforms/LoopVectorize/check-prof-info.ll (+14-22) 
- (modified) llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll (+1-14) 
- (modified) llvm/test/Transforms/LoopVectorize/constantfolder.ll (+13-118) 
- (modified) llvm/test/Transforms/LoopVectorize/create-induction-resume.ll (+1-13) 
- (modified) llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll (+4-5) 
- (modified) llvm/test/Transforms/LoopVectorize/dead_instructions.ll (+18-31) 
- (modified) llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll (+1-18) 
- (modified) llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll (+41-365) 
- (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll (+1-10) 
- (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll (+1-10) 
- (modified) llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll (+4-6) 
- (modified) llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll (+8-10) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+37-58) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll (+22-46) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll (+2-16) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll (+15-36) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+96-270) 
- (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+3-15) 
- (modified) llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll (+2-15) 
- (modified) llvm/test/Transforms/LoopVectorize/forked-pointers.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/hints-trans.ll (+20-2) 
- (modified) llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll (+9-12) 
- (modified) llvm/test/Transforms/LoopVectorize/if-pred-stores.ll (+15-69) 
- (modified) llvm/test/Transforms/LoopVectorize/if-reduction.ll (+4-6) 
- (modified) llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll (+1-11) 
- (modified) llvm/test/Transforms/LoopVectorize/induction-step.ll (+9-22) 
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+181-357) 
- (modified) llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll (+14-71) 
- (modified) llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll (+1-15) 
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll (+9-27) 
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll (+17-17) 
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll (+22-40) 
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+41-80) 
- (modified) llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll (+6-7) 
- (modified) llvm/test/Transforms/LoopVectorize/is_fpclass.ll (+1-13) 
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+33-189) 
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+24-150) 


``````````diff

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 98554310c74df..951e3fcc6e60c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2357,9 +2357,9 @@ EpilogueVectorizerMainLoop::createIterationCountCheck(ElementCount VF,
 /// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
 /// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
 /// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
-static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB,
+static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPlan &Plan, VPBasicBlock *VPBB,
                                              BasicBlock *IRBB) {
-  VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
+  VPIRBasicBlock *IRVPBB = Plan.createVPIRBasicBlock(IRBB);
   auto IP = IRVPBB->begin();
   for (auto &R : make_early_inc_range(VPBB->phis()))
     R.moveBefore(*IRVPBB, IP);
@@ -2571,6 +2571,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
   // Remove redundant induction instructions.
   cse(HeaderBB);
 
+  if (Plan.getScalarPreheader()->getNumPredecessors() == 0)
+    return;
+
   // Set/update profile weights for the vector and remainder loops as original
   // loop iterations are now distributed among them. Note that original loop
   // becomes the scalar remainder loop after vectorization.
@@ -7226,6 +7229,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
   VPlanTransforms::simplifyRecipes(BestVPlan);
   VPlanTransforms::removeBranchOnConst(BestVPlan);
+  if (BestVPlan.getEntry()->getSingleSuccessor() ==
+      BestVPlan.getScalarPreheader()) {
+    // TODO: Should not even try to vectorize.
+    return DenseMap<const SCEV *, Value *>();
+  }
+
   VPlanTransforms::narrowInterleaveGroups(
       BestVPlan, BestVF,
       TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -7268,7 +7277,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   BasicBlock *EntryBB =
       cast<VPIRBasicBlock>(BestVPlan.getEntry())->getIRBasicBlock();
   State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
-  replaceVPBBWithIRVPBB(BestVPlan.getScalarPreheader(),
+  replaceVPBBWithIRVPBB(BestVPlan, BestVPlan.getScalarPreheader(),
                         State.CFG.PrevBB->getSingleSuccessor());
   VPlanTransforms::removeDeadRecipes(BestVPlan);
 
@@ -7351,8 +7360,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
     } else {
       // Keep all loop hints from the original loop on the vector loop (we'll
       // replace the vectorizer-specific hints below).
-      if (MDNode *LID = OrigLoop->getLoopID())
-        L->setLoopID(LID);
+      if (BestVPlan.getScalarPreheader()->getNumPredecessors() > 0)
+        if (MDNode *LID = OrigLoop->getLoopID())
+          L->setLoopID(LID);
 
       LoopVectorizeHints Hints(L, true, *ORE);
       Hints.setAlreadyVectorized();
@@ -7383,6 +7393,18 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
       addRuntimeUnrollDisableMetaData(L);
   }
 
+  if (BestVPlan.getScalarPreheader()->getNumPredecessors() == 0) {
+    // If the original loop became unreachable, we need to delete it.
+    auto Blocks = OrigLoop->getBlocksVector();
+    Blocks.push_back(cast<VPIRBasicBlock>(BestVPlan.getScalarPreheader())
+                         ->getIRBasicBlock());
+    for (auto *BB : Blocks)
+      LI->removeBlock(BB);
+    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+    DeleteDeadBlocks(Blocks, &DTU);
+    LI->erase(OrigLoop);
+  }
+
   // 3. Fix the vectorized code: take care of header phi's, live-outs,
   //    predication, updating analyses.
   ILV.fixVectorizedLoop(State);
@@ -7460,7 +7482,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
     // generated here dominates the vector epilog iter check.
     EPI.TripCount = Count;
   } else {
-    VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
+    VectorPHVPBB =
+        replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
   }
 
   BranchInst &BI =
@@ -7493,7 +7516,7 @@ BasicBlock *EpilogueVectorizerEpilogueLoop::createVectorizedLoopSkeleton() {
   BasicBlock *VecEpilogueIterationCountCheck =
       SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
                  nullptr, "vec.epilog.iter.check", true);
-  VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
+  VectorPHVPBB = replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
 
   emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
                                           VecEpilogueIterationCountCheck);
@@ -10213,11 +10236,22 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
   }
 
+  if (ORE->allowExtraAnalysis(LV_NAME))
+    checkMixedPrecision(L, ORE);
+
   bool DisableRuntimeUnroll = false;
   MDNode *OrigLoopID = L->getLoopID();
+  bool LoopRemoved = false;
   {
     using namespace ore;
     if (!VectorizeLoop) {
+      ORE->emit([&]() {
+        return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
+                                  L->getHeader())
+               << "interleaved loop (interleaved count: "
+               << NV("InterleaveCount", IC) << ")";
+      });
+
       assert(IC > 1 && "interleave count should not be 1 or 0");
       // If we decided that it is not legal to vectorize the loop, then
       // interleave it.
@@ -10234,14 +10268,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
                                    VF.MinProfitableTripCount);
       LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
-
-      ORE->emit([&]() {
-        return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
-                                  L->getHeader())
-               << "interleaved loop (interleaved count: "
-               << NV("InterleaveCount", IC) << ")";
-      });
+      LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
     } else {
+      // Report the vectorization decision.
+      reportVectorization(ORE, L, VF, IC);
+
       // If we decided that it is *legal* to vectorize the loop, then do it.
 
       VPlan &BestPlan = LVP.getPlanFor(VF.Width);
@@ -10311,23 +10342,23 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         // rarely used is not worth unrolling.
         if (!Checks.hasChecks())
           DisableRuntimeUnroll = true;
+        LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
       }
-      // Report the vectorization decision.
-      reportVectorization(ORE, L, VF, IC);
     }
-
-    if (ORE->allowExtraAnalysis(LV_NAME))
-      checkMixedPrecision(L, ORE);
   }
 
   assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
          "DT not preserved correctly");
 
+  if (LoopRemoved)
+    return true;
+
   std::optional<MDNode *> RemainderLoopID =
       makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
                                       LLVMLoopVectorizeFollowupEpilogue});
   if (RemainderLoopID) {
-    L->setLoopID(*RemainderLoopID);
+    if (!LoopRemoved)
+      L->setLoopID(*RemainderLoopID);
   } else {
     if (DisableRuntimeUnroll)
       addRuntimeUnrollDisableMetaData(L);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1438dc366b55d..4a7618f40164b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -972,12 +972,14 @@ void VPlan::execute(VPTransformState *State) {
   setName("Final VPlan");
   LLVM_DEBUG(dump());
 
-  // Disconnect scalar preheader and scalar header, as the dominator tree edge
-  // will be updated as part of VPlan execution. This allows keeping the DTU
-  // logic generic during VPlan execution.
   BasicBlock *ScalarPh = State->CFG.ExitBB;
-  State->CFG.DTU.applyUpdates(
-      {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
+  if (getScalarPreheader()->getNumPredecessors() > 0) {
+    // Disconnect scalar preheader and scalar header, as the dominator tree edge
+    // will be updated as part of VPlan execution. This allows keeping the DTU
+    // logic generic during VPlan execution.
+    State->CFG.DTU.applyUpdates(
+        {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
+  }
 
   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
       Entry);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d32d2a9ad11f7..8e7fc24080c31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1920,7 +1920,7 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_shallow(Plan.getEntry()))) {
     VPValue *Cond;
-    if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
+    if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
         !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
       continue;
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 9e1d325a4d8d6..2959e9440e753 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -49,6 +49,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
     case Instruction::GetElementPtr:
     case Instruction::ICmp:
     case Instruction::FCmp:
+    case Instruction::Select:
     case VPInstruction::Broadcast:
     case VPInstruction::PtrAdd:
       return true;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
index c18f9f2fae06b..ddfdb257ed49a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
@@ -52,8 +52,8 @@ define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) {
 ; CHECK-NEXT:    [[TMP17]] = add <2 x i64> [[VEC_PHI]], [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
-; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       for.end:
 ; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP17]])
 ; CHECK-NEXT:    ret i64 [[TMP19]]
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index e44ddbce34fd5..58965c19ae1cc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -202,8 +202,8 @@ exit:
 define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i1 %c.0) {
 ; CHECK-LABEL: define void @test_blend_feeding_replicated_store_2(
 ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i1 [[C_0:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
@@ -366,12 +366,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
 ; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
 ; CHECK-NEXT:    br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br label %[[SCALAR_PH]]
+; CHECK-NEXT:    br label %[[SCALAR_PH:.*]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
-; CHECK-NEXT:    [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[IV1:%.*]] = phi i32 [ 96, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
 ; CHECK-NEXT:    [[GEP_SRC1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV1]]
 ; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
 ; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
index f099c22333c3e..23918427e7003 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
@@ -6,8 +6,8 @@ target triple = "arm64-apple-macosx11.0.0"
 define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
 ; CHECK-LABEL: define void @fshl_operand_first_order_recurrence(
 ; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[SRC:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
@@ -30,14 +30,12 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
 ; CHECK-NEXT:    br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
-; CHECK-NEXT:    br label %[[SCALAR_PH]]
+; CHECK-NEXT:    br label %[[SCALAR_PH:.*]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 100, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RECUR:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
 ; CHECK-NEXT:    [[L]] = load i64, ptr [[GEP_SRC]], align 8
 ; CHECK-NEXT:    [[OR:%.*]] = tail call i64 @llvm.fshl.i64(i64 1, i64 [[RECUR]], i64 1)
@@ -73,7 +71,7 @@ define void @powi_call(ptr %P) {
 ; CHECK-LABEL: define void @powi_call(
 ; CHECK-SAME: ptr [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
@@ -83,17 +81,6 @@ define void @powi_call(ptr %P) {
 ; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
-; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[IV]]
-; CHECK-NEXT:    [[L:%.*]] = load double, ptr [[GEP]], align 8
-; CHECK-NEXT:    [[POWI:%.*]] = tail call double @llvm.powi.f64.i32(double [[L]], i32 3)
-; CHECK-NEXT:    store double [[POWI]], ptr [[GEP]], align 8
-; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -224,5 +211,4 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
 ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
 ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 626242667e203..481be08e7e5ae 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -5,7 +5,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
 ; CHECK-LABEL: define void @clamped_tc_8(
 ; CHECK-SAME: ptr captures(none) [[DST:%.*]], i32 [[N:%.*]], i64 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -32,20 +32,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
 ; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[DST]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP19:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP19]]
-; CHECK-NEXT:    [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8
-; CHECK-NEXT:    store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ;
@@ -79,7 +66,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[REM]], 7
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[ADD]], 3
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -104,22 +91,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/155497