[llvm] 465b17c - [VPlan] Support scalable VFs in narrowInterleaveGroups. (#154842)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 31 12:45:10 PDT 2025
Author: Florian Hahn
Date: 2025-08-31T20:45:07+01:00
New Revision: 465b17c45007b3030f918098cc801f065508e732
URL: https://github.com/llvm/llvm-project/commit/465b17c45007b3030f918098cc801f065508e732
DIFF: https://github.com/llvm/llvm-project/commit/465b17c45007b3030f918098cc801f065508e732.diff
LOG: [VPlan] Support scalable VFs in narrowInterleaveGroups. (#154842)
Update narrowInterleaveGroups to support scalable VFs. After the
transform, the vector loop will process a single iteration of the
original vector loop for fixed-width vectors and vscale iterations for
scalable vectors.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6c5f9b7302292..8d376557553c0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3615,12 +3615,12 @@ static bool isAlreadyNarrow(VPValue *VPV) {
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth) {
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
- if (VF.isScalable() || !VectorLoop)
+ if (!VectorLoop)
return;
VPTypeAnalysis TypeInfo(Plan);
- unsigned FixedVF = VF.getFixedValue();
+ unsigned VFMinVal = VF.getKnownMinValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
if (isa<VPCanonicalIVPHIRecipe>(&R) ||
@@ -3656,7 +3656,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
continue;
// Bail out on non-consecutive interleave groups.
- if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF, TypeInfo,
+ if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
VectorRegWidth))
return;
@@ -3775,10 +3775,21 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// original iteration.
auto *CanIV = Plan.getCanonicalIV();
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
- Inc->setOperand(1, Plan.getOrAddLiveIn(ConstantInt::get(
- CanIV->getScalarType(), 1 * Plan.getUF())));
- Plan.getVF().replaceAllUsesWith(
- Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
+ VPBuilder PHBuilder(Plan.getVectorPreheader());
+
+ VPValue *UF = Plan.getOrAddLiveIn(
+ ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
+ if (VF.isScalable()) {
+ VPValue *VScale = PHBuilder.createElementCount(
+ CanIV->getScalarType(), ElementCount::getScalable(1));
+ VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
+ Inc->setOperand(1, VScaleUF);
+ Plan.getVF().replaceAllUsesWith(VScale);
+ } else {
+ Inc->setOperand(1, UF);
+ Plan.getVF().replaceAllUsesWith(
+ Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
+ }
removeDeadRecipes(Plan);
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
index 7533636f9d41c..46ba7f645a03e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
@@ -16,18 +16,15 @@ define void @load_store_interleave_group(ptr noalias %data) {
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]])
-; CHECK-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP1]], align 8
+; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -69,20 +66,16 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1111, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1111, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x double>, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = fneg <vscale x 2 x double> [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 2 x double>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = fneg <vscale x 2 x double> [[TMP7]]
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> [[TMP8]], <vscale x 2 x double> [[TMP9]])
-; CHECK-NEXT: store <vscale x 4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT: store <vscale x 2 x double> [[TMP9]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
More information about the llvm-commits
mailing list