[llvm] [VPlan] Remove original loop if dead after vectorization. (PR #155497)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 26 14:00:23 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Build on top of https://github.com/llvm/llvm-project/pull/154510 to
completely remove dead scalar loops.
Depends on https://github.com/llvm/llvm-project/pull/154510. (Included
in the PR)
---
Patch is 2.94 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155497.diff
337 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+51-20)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+7-5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+1-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.h (+1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll (+4-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll (+6-20)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll (+4-32)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+34-30)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll (+1-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+8-10)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll (+1-31)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll (+6-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll (+1-14)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll (+24-30)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll (+43-57)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+23-31)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+15-29)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll (+4-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave_count_for_known_tc.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll (+2-17)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll (+4-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll (+8-23)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+25-32)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll (+2-13)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll (-51)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+25-228)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll (+10-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll (+22-18)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll (+75-54)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+87-81)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll (+63-60)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll (+2-16)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+5-52)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+22-127)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll (+151-25)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll (+18-61)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+15-26)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll (+6-11)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll (+3-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+6-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+20-23)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll (+15-11)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll (+11-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll (+18-106)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+7-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+42-32)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll (+5-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+5-221)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll (+4-40)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-constant-ops.ll (+16-16)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-metadata.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll (+12-84)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll (+3-37)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll (+56-56)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll (+36-36)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll (+3-38)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll (+6-8)
- (modified) llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll (+6-16)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll (+18-61)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll (+33-75)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll (+34-152)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll (+19-171)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll (+18-24)
- (modified) llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll (+1-13)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll (+3-33)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll (+33-208)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll (+34-77)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll (+4-27)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll (+44-302)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll (+4-26)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/f16.ll (+1-14)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll (+16-20)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll (+5-7)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll (+10-62)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (+63-948)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll (+14-10)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll (+3-36)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll (+10-68)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll (+2-20)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll (+6-8)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll (+30-22)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll (+1-24)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll (+2-26)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll (+2-23)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll (+65-296)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll (+2-13)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+45-56)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll (+9-57)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll (+12-77)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll (+14-80)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll (+17-103)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+43-88)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll (+72-90)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll (+36-45)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll (+41-63)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll (+30-170)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll (+9-69)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll (+13-71)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll (+1-14)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll (+49-227)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll (+16-128)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll (+8-12)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll (+1-12)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll (+6-42)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll (+2-20)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll (+2-14)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll (+49-227)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll (+9-58)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll (+19-104)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll (+1-11)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll (+9-73)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll (+1-16)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll (+4-5)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+62-232)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll (+8-29)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll (+1-15)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll (+4-5)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll (+3-11)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll (+1-11)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll (+1-22)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll (+1-13)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll (+1-25)
- (modified) llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll (+11-12)
- (modified) llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll (+5-53)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll (+7-26)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+15-51)
- (modified) llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll (+8-10)
- (modified) llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll (+3-24)
- (modified) llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll (+15-15)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+10-26)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll (+24-30)
- (modified) llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll (+8-10)
- (modified) llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll (+6-46)
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+41-71)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll (+28-83)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll (+5-7)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll (+8-12)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaving.ll (+3-15)
- (modified) llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll (+6-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+81-317)
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll (+8-12)
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll (+24-31)
- (modified) llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll (+4-28)
- (modified) llvm/test/Transforms/LoopVectorize/X86/optsize.ll (+14-102)
- (modified) llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll (+7-11)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll (+5-7)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll (+10-14)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll (+2-17)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr34438.ll (+4-17)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr36524.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll (+2-20)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr81872.ll (+6-27)
- (modified) llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll (+12-48)
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll (+8-10)
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll (+1-19)
- (modified) llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll (+1-15)
- (modified) llvm/test/Transforms/LoopVectorize/X86/small-size.ll (+22-46)
- (modified) llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll (+2-36)
- (modified) llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll (+6-50)
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll (+33-61)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll (+3-29)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll (+1-15)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll (+1-16)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll (+6-44)
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll (+5-26)
- (modified) llvm/test/Transforms/LoopVectorize/assume.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/blend-in-header.ll (+12-16)
- (modified) llvm/test/Transforms/LoopVectorize/bsd_regex.ll (+1-5)
- (modified) llvm/test/Transforms/LoopVectorize/check-prof-info.ll (+14-22)
- (modified) llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll (+1-14)
- (modified) llvm/test/Transforms/LoopVectorize/constantfolder.ll (+13-118)
- (modified) llvm/test/Transforms/LoopVectorize/create-induction-resume.ll (+1-13)
- (modified) llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll (+4-5)
- (modified) llvm/test/Transforms/LoopVectorize/dead_instructions.ll (+18-31)
- (modified) llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll (+1-18)
- (modified) llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll (+41-365)
- (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll (+1-10)
- (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll (+1-10)
- (modified) llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll (+8-10)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+37-58)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll (+22-46)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll (+2-16)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll (+15-36)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+96-270)
- (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+3-15)
- (modified) llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll (+2-15)
- (modified) llvm/test/Transforms/LoopVectorize/forked-pointers.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/hints-trans.ll (+20-2)
- (modified) llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll (+9-12)
- (modified) llvm/test/Transforms/LoopVectorize/if-pred-stores.ll (+15-69)
- (modified) llvm/test/Transforms/LoopVectorize/if-reduction.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll (+1-11)
- (modified) llvm/test/Transforms/LoopVectorize/induction-step.ll (+9-22)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+181-357)
- (modified) llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll (+14-71)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll (+1-15)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll (+9-27)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll (+17-17)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll (+22-40)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+41-80)
- (modified) llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll (+6-7)
- (modified) llvm/test/Transforms/LoopVectorize/is_fpclass.ll (+1-13)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+33-189)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+24-150)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 98554310c74df..951e3fcc6e60c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2357,9 +2357,9 @@ EpilogueVectorizerMainLoop::createIterationCountCheck(ElementCount VF,
/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
/// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
-static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB,
+static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPlan &Plan, VPBasicBlock *VPBB,
BasicBlock *IRBB) {
- VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
+ VPIRBasicBlock *IRVPBB = Plan.createVPIRBasicBlock(IRBB);
auto IP = IRVPBB->begin();
for (auto &R : make_early_inc_range(VPBB->phis()))
R.moveBefore(*IRVPBB, IP);
@@ -2571,6 +2571,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Remove redundant induction instructions.
cse(HeaderBB);
+ if (Plan.getScalarPreheader()->getNumPredecessors() == 0)
+ return;
+
// Set/update profile weights for the vector and remainder loops as original
// loop iterations are now distributed among them. Note that original loop
// becomes the scalar remainder loop after vectorization.
@@ -7226,6 +7229,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan);
VPlanTransforms::removeBranchOnConst(BestVPlan);
+ if (BestVPlan.getEntry()->getSingleSuccessor() ==
+ BestVPlan.getScalarPreheader()) {
+ // TODO: Should not even try to vectorize.
+ return DenseMap<const SCEV *, Value *>();
+ }
+
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -7268,7 +7277,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BasicBlock *EntryBB =
cast<VPIRBasicBlock>(BestVPlan.getEntry())->getIRBasicBlock();
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
- replaceVPBBWithIRVPBB(BestVPlan.getScalarPreheader(),
+ replaceVPBBWithIRVPBB(BestVPlan, BestVPlan.getScalarPreheader(),
State.CFG.PrevBB->getSingleSuccessor());
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -7351,8 +7360,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
} else {
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
- if (MDNode *LID = OrigLoop->getLoopID())
- L->setLoopID(LID);
+ if (BestVPlan.getScalarPreheader()->getNumPredecessors() > 0)
+ if (MDNode *LID = OrigLoop->getLoopID())
+ L->setLoopID(LID);
LoopVectorizeHints Hints(L, true, *ORE);
Hints.setAlreadyVectorized();
@@ -7383,6 +7393,18 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
addRuntimeUnrollDisableMetaData(L);
}
+ if (BestVPlan.getScalarPreheader()->getNumPredecessors() == 0) {
+ // If the original loop became unreachable, we need to delete it.
+ auto Blocks = OrigLoop->getBlocksVector();
+ Blocks.push_back(cast<VPIRBasicBlock>(BestVPlan.getScalarPreheader())
+ ->getIRBasicBlock());
+ for (auto *BB : Blocks)
+ LI->removeBlock(BB);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ DeleteDeadBlocks(Blocks, &DTU);
+ LI->erase(OrigLoop);
+ }
+
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
ILV.fixVectorizedLoop(State);
@@ -7460,7 +7482,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
// generated here dominates the vector epilog iter check.
EPI.TripCount = Count;
} else {
- VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
+ VectorPHVPBB =
+ replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
}
BranchInst &BI =
@@ -7493,7 +7516,7 @@ BasicBlock *EpilogueVectorizerEpilogueLoop::createVectorizedLoopSkeleton() {
BasicBlock *VecEpilogueIterationCountCheck =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
nullptr, "vec.epilog.iter.check", true);
- VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
+ VectorPHVPBB = replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
VecEpilogueIterationCountCheck);
@@ -10213,11 +10236,22 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
}
+ if (ORE->allowExtraAnalysis(LV_NAME))
+ checkMixedPrecision(L, ORE);
+
bool DisableRuntimeUnroll = false;
MDNode *OrigLoopID = L->getLoopID();
+ bool LoopRemoved = false;
{
using namespace ore;
if (!VectorizeLoop) {
+ ORE->emit([&]() {
+ return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
+ L->getHeader())
+ << "interleaved loop (interleaved count: "
+ << NV("InterleaveCount", IC) << ")";
+ });
+
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
@@ -10234,14 +10268,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
VF.MinProfitableTripCount);
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
-
- ORE->emit([&]() {
- return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
- L->getHeader())
- << "interleaved loop (interleaved count: "
- << NV("InterleaveCount", IC) << ")";
- });
+ LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
} else {
+ // Report the vectorization decision.
+ reportVectorization(ORE, L, VF, IC);
+
// If we decided that it is *legal* to vectorize the loop, then do it.
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
@@ -10311,23 +10342,23 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// rarely used is not worth unrolling.
if (!Checks.hasChecks())
DisableRuntimeUnroll = true;
+ LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
}
- // Report the vectorization decision.
- reportVectorization(ORE, L, VF, IC);
}
-
- if (ORE->allowExtraAnalysis(LV_NAME))
- checkMixedPrecision(L, ORE);
}
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
"DT not preserved correctly");
+ if (LoopRemoved)
+ return true;
+
std::optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
if (RemainderLoopID) {
- L->setLoopID(*RemainderLoopID);
+ if (!LoopRemoved)
+ L->setLoopID(*RemainderLoopID);
} else {
if (DisableRuntimeUnroll)
addRuntimeUnrollDisableMetaData(L);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1438dc366b55d..4a7618f40164b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -972,12 +972,14 @@ void VPlan::execute(VPTransformState *State) {
setName("Final VPlan");
LLVM_DEBUG(dump());
- // Disconnect scalar preheader and scalar header, as the dominator tree edge
- // will be updated as part of VPlan execution. This allows keeping the DTU
- // logic generic during VPlan execution.
BasicBlock *ScalarPh = State->CFG.ExitBB;
- State->CFG.DTU.applyUpdates(
- {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
+ if (getScalarPreheader()->getNumPredecessors() > 0) {
+ // Disconnect scalar preheader and scalar header, as the dominator tree edge
+ // will be updated as part of VPlan execution. This allows keeping the DTU
+ // logic generic during VPlan execution.
+ State->CFG.DTU.applyUpdates(
+ {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
+ }
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
Entry);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d32d2a9ad11f7..8e7fc24080c31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1920,7 +1920,7 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
VPValue *Cond;
- if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
+ if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 9e1d325a4d8d6..2959e9440e753 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -49,6 +49,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
case Instruction::GetElementPtr:
case Instruction::ICmp:
case Instruction::FCmp:
+ case Instruction::Select:
case VPInstruction::Broadcast:
case VPInstruction::PtrAdd:
return true;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
index c18f9f2fae06b..ddfdb257ed49a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
@@ -52,8 +52,8 @@ define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) {
; CHECK-NEXT: [[TMP17]] = add <2 x i64> [[VEC_PHI]], [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: middle.block:
+; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: for.end:
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP17]])
; CHECK-NEXT: ret i64 [[TMP19]]
;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index e44ddbce34fd5..58965c19ae1cc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -202,8 +202,8 @@ exit:
define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i1 %c.0) {
; CHECK-LABEL: define void @test_blend_feeding_replicated_store_2(
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i1 [[C_0:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
@@ -366,12 +366,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 96, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV1]]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
index f099c22333c3e..23918427e7003 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
@@ -6,8 +6,8 @@ target triple = "arm64-apple-macosx11.0.0"
define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-LABEL: define void @fshl_operand_first_order_recurrence(
; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[SRC:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -30,14 +30,12 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
-; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[L]] = load i64, ptr [[GEP_SRC]], align 8
; CHECK-NEXT: [[OR:%.*]] = tail call i64 @llvm.fshl.i64(i64 1, i64 [[RECUR]], i64 1)
@@ -73,7 +71,7 @@ define void @powi_call(ptr %P) {
; CHECK-LABEL: define void @powi_call(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -83,17 +81,6 @@ define void @powi_call(ptr %P) {
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[IV]]
-; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP]], align 8
-; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.powi.f64.i32(double [[L]], i32 3)
-; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -224,5 +211,4 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 626242667e203..481be08e7e5ae 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -5,7 +5,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-LABEL: define void @clamped_tc_8(
; CHECK-SAME: ptr captures(none) [[DST:%.*]], i32 [[N:%.*]], i64 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -32,20 +32,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
-; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[DST]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP19]]
-; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8
-; CHECK-NEXT: store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1
-; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
@@ -79,7 +66,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[REM]], 7
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 3
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -104,22 +91,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155497
More information about the llvm-commits
mailing list