[llvm] [LV] Change VPVectorPointerRecipe to emit byte GEP instead of typed GEP (PR #174934)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 8 01:31:01 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Mel Chen (Mel-Chen)
<details>
<summary>Changes</summary>
Currently, VPVectorPointerRecipe has the form: `vector-pointer %ptr, (%offset)`.
This makes it difficult to add a stride operand to VPVectorPointerRecipe for #<!-- -->147297.
One approach to solve this is to convert VPVectorPointerRecipe into an abstract recipe with the form `vector-pointer %ptr, %stride_in_bytes`, and revert to handle the unroll part by VPUnrollPartAccessor apporach. The conversion of each VPVectorPointerRecipe to Mul + PtrAdd would only happen during VPlanTransforms::convertToConcreteRecipes. As a first step, this patch changes the emitted IR from typed GEP to byte GEP. This will also help with future support for unaligned strided accesses.
---
Patch is 596.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174934.diff
121 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+1-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+7-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+7-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll (+21-21)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll (+18-18)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll (+10-7)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+15-10)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll (+5-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+317-290)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+20-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll (+6-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll (+5-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+15-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll (+15-10)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll (+12-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll (+48-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll (+56-56)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+13-7)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+19-19)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-step.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+33-33)
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll (+117-117)
- (modified) llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr35432.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr47437.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll (+10-10)
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/assume.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/cse-casts.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/dead_instructions.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+13-13)
- (modified) llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+13-13)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll (+39-39)
- (modified) llvm/test/Transforms/LoopVectorize/metadata.ll (+13-13)
- (modified) llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/predicate-switch.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop.ll (+44-44)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-assume.ll (+9-6)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+15-9)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-inductions.ll (+6-4)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp.ll (+15-15)
- (modified) llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll (+10-10)
- (modified) llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll (+6-6)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b9cd322d9ec69..60365f3ca65c4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2595,7 +2595,7 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
"Expected prior simplification of recipe without offset");
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *Offset = State.get(getOffset(), true);
- Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "",
+ Value *ResultPtr = Builder.CreateGEP(Builder.getInt8Ty(), Ptr, Offset, "",
getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index be2a68ca40b93..5c8f7557ef4fc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -309,8 +309,14 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
VPValue *VFxPart = Builder.createOverflowingOp(
Instruction::Mul, {VF, Plan.getConstantInt(IndexTy, Part)},
{true, true});
+ VPValue *Offset = Builder.createOverflowingOp(
+ Instruction::Mul,
+ {VFxPart,
+ Plan.getConstantInt(
+ IndexTy, DL.getTypeAllocSize(VPR->getSourceElementType()))},
+ {true, true});
Copy->setOperand(0, VPR->getOperand(0));
- Copy->addOperand(VFxPart);
+ Copy->addOperand(Offset);
continue;
}
if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
index 95b4dcb23dd47..c1e29dd9d04f9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
@@ -14,7 +14,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ <i64 poison, i64 0>, %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> <i32 1, i32 2>
@@ -22,7 +22,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1))
; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1))
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 2
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16
; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP10]], align 8
; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP13]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index d7d77cb4325d4..d4b59e6839999 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -63,7 +63,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) {
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
-; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i64 2
+; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8
; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]])
@@ -545,11 +545,14 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP8]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double>
+; DEFAULT-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP11]], 3
; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP11]], 1
+; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP14]], 3
; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3
-; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]]
-; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]]
-; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP17]]
+; DEFAULT-NEXT: [[TMP20:%.*]] = shl nuw nsw i64 [[TMP17]], 3
+; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 [[TMP16]]
+; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 [[TMP13]]
+; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 [[TMP20]]
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[NEXT_GEP1]], align 8
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP12]], align 8
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP15]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
index f0664197dcb94..5d2bf22b946ec 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -287,7 +287,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr %
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP0]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i64 16
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 32
; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP8]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
@@ -413,7 +413,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[WIDE_LOAD]] to <4 x i1>
@@ -427,7 +427,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32>
; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 4
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP12]], align 4
; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 5d550dc07ce4b..cc79e4b730364 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -37,7 +37,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]]
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]]
-; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP11]], 3
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[TMP34]], i64 [[TMP19]]
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
index 549df337e6907..ca59522a9ff35 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
@@ -128,17 +128,17 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 8
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 16
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 24
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i16>, ptr [[TMP4]], align 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 8
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 16
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 24
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 32
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 48
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP8]], align 1
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP9]], align 1
@@ -148,9 +148,9 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n
; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[WIDE_LOAD7]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP14:%.*]] = add <8 x i16> [[WIDE_LOAD8]], [[WIDE_LOAD4]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 8
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 16
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 24
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 16
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 32
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 48
; CHECK-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP15]], align 1
; CHECK-NEXT: store <8 x i16> [[TMP12]], ptr [[TMP17]], align 1
; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP18]], align 1
@@ -237,17 +237,17 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP4]], align 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 4
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 8
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 12
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 32
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 48
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP8]], align 1
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP9]], align 1
@@ -257,9 +257,9 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 8
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 12
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 16
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 32
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 48
; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP15]], align 1
; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP17]], align 1
; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP18]], align 1
@@ -447,9 +447,9 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[INDEX]], 4
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 8
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 12
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 48
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP8]], align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP9]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 85726c161cc54..d05a6c35f39c7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -117,7 +117,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4
; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -201,7 +201,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4
; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -285,7 +285,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10)
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10)
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP1]], align 4
; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll
index feb0175e75542..9045bb7b070d6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll
@@ -21,16 +21,16 @@ define double @fp128_fmuladd_reduction(ptr %start0, ptr %start1, ptr %end0, ptr
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[START0]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[START1]], i64 [[TMP2]]
-; CHECK-N...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174934
More information about the llvm-commits
mailing list