[llvm] [VPlan] Support scalable VFs in narrowInterleaveGroups. (PR #154842)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 14:08:38 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/154842
Update narrowInterleaveGroups to support scalable VFs. After the transform, the vector loop will process a single iteration of the original vector loop for fixed-width vectors and vscale iterations for scalable vectors.
>From aecaceaffde40c927176095dcb8412ea5f4f4e24 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 21 Aug 2025 12:10:17 +0100
Subject: [PATCH] [VPlan] Support scalable VFs in narrowInterleaveGroups.
Update narrowInterleaveGroups to support scalable VFs. After the
transform, the vector loop will process a single iteration of the
original vector loop for fixed-width vectors and vscale iterations for
scalable vectors.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 25 +++++++++++++------
...row-interleave-to-widen-memory-scalable.ll | 23 ++++++-----------
2 files changed, 26 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 504dd9a7c22c7..2d8ad7daf2da6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3533,12 +3533,12 @@ static bool isAlreadyNarrow(VPValue *VPV) {
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth) {
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
- if (VF.isScalable() || !VectorLoop)
+ if (!VectorLoop)
return;
VPTypeAnalysis TypeInfo(Plan);
- unsigned FixedVF = VF.getFixedValue();
+ unsigned VFMinVal = VF.getKnownMinValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
if (isa<VPCanonicalIVPHIRecipe>(&R) ||
@@ -3574,7 +3574,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
continue;
// Bail out on non-consecutive interleave groups.
- if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF, TypeInfo,
+ if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
VectorRegWidth))
return;
@@ -3693,10 +3693,21 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// original iteration.
auto *CanIV = Plan.getCanonicalIV();
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
- Inc->setOperand(1, Plan.getOrAddLiveIn(ConstantInt::get(
- CanIV->getScalarType(), 1 * Plan.getUF())));
- Plan.getVF().replaceAllUsesWith(
- Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
+ VPBuilder PHBuilder(Plan.getVectorPreheader());
+
+ VPValue *UF = Plan.getOrAddLiveIn(
+ ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
+ if (VF.isScalable()) {
+ VPValue *VScale = PHBuilder.createElementCount(
+ CanIV->getScalarType(), ElementCount::getScalable(1));
+ VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
+ Inc->setOperand(1, VScaleUF);
+ Plan.getVF().replaceAllUsesWith(VScale);
+ } else {
+ Inc->setOperand(1, UF);
+ Plan.getVF().replaceAllUsesWith(
+ Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
+ }
removeDeadRecipes(Plan);
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
index 7533636f9d41c..46ba7f645a03e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
@@ -16,18 +16,15 @@ define void @load_store_interleave_group(ptr noalias %data) {
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]])
-; CHECK-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP1]], align 8
+; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -69,20 +66,16 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1111, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1111, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP0]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x double>, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = fneg <vscale x 2 x double> [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 2 x double>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = fneg <vscale x 2 x double> [[TMP7]]
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> [[TMP8]], <vscale x 2 x double> [[TMP9]])
-; CHECK-NEXT: store <vscale x 4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT: store <vscale x 2 x double> [[TMP9]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
More information about the llvm-commits
mailing list