[llvm] [LV] Ignore user-specified interleave count when unsafe. (PR #153009)
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 05:53:41 PDT 2025
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/153009
>From 938a0e31a231ec4715821a3ba5d1dcfc83723533 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 7 Aug 2025 13:53:14 +0000
Subject: [PATCH 1/5] [LV] Ignore user-specified interleave count when unsafe.
When an VF is specified via a loop hint, it will be clamped to a
safe VF or ignored if it is found to be unsafe. This is not the
case for user-specified interleave counts, which can lead to
loops such as the following with a memory dependence being
vectorised with the specified IC:
#pragma clang loop interleave_count(4)
for (int i = 4; i < LEN; i++)
b[i] = b[i - 4] + a[i];
According to [1], loop hints are ignored if they are not safe to apply.
This patch adds a check to prevent vectorisation with interleaving if
isSafeForAnyVectorWidth() returns false. This is already checked in
selectInterleaveCount().
[1] https://llvm.org/docs/LangRef.html#llvm-loop-vectorize-and-llvm-loop-interleave
---
.../Transforms/Vectorize/LoopVectorize.cpp | 22 +++++++++----
.../AArch64/scalable-reductions.ll | 13 +++-----
.../LoopVectorize/unsafe-ic-hint-remark.ll | 33 +++++++++++++++++++
3 files changed, 53 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b7963b98e7a4..38fecec6766c8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9844,8 +9844,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
+ unsigned SafeUserIC = CM.Legal->isSafeForAnyVectorWidth() ? UserIC : 0;
+
// Plan how to best vectorize.
- LVP.plan(UserVF, UserIC);
+ LVP.plan(UserVF, SafeUserIC);
VectorizationFactor VF = LVP.computeBestVF();
unsigned IC = 1;
@@ -9857,7 +9859,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Select the interleave count.
IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
- unsigned SelectedIC = std::max(IC, UserIC);
+ unsigned SelectedIC = std::max(IC, SafeUserIC);
+
// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
if (VF.Width.isVector() || SelectedIC > 1) {
@@ -9907,7 +9910,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
+ if (UserIC > 0 && UserIC != SafeUserIC) {
+ LLVM_DEBUG(dbgs() << "LV: Disabling interleaving as user-specified "
+ "interleave count is unsafe.\n");
+ IntDiagMsg = {"InterleavingUnsafe",
+ "User-specified interleave count is not safe, interleave "
+ "count is set to 1."};
+ InterleaveLoop = false;
+ } else if (!LVP.hasPlanWithVF(VF.Width) && SafeUserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -9915,7 +9925,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
IntDiagMsg = {"InterleavingAvoided",
"Ignoring UserIC, because interleaving was avoided up front"};
InterleaveLoop = false;
- } else if (IC == 1 && UserIC <= 1) {
+ } else if (IC == 1 && SafeUserIC <= 1) {
// Tell the user interleaving is not beneficial.
LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
IntDiagMsg = {
@@ -9927,7 +9937,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
IntDiagMsg.second +=
" and is explicitly disabled or interleave count is set to 1";
}
- } else if (IC > 1 && UserIC == 1) {
+ } else if (IC > 1 && SafeUserIC == 1) {
// Tell the user interleaving is beneficial, but it explicitly disabled.
LLVM_DEBUG(dbgs() << "LV: Interleaving is beneficial but is explicitly "
"disabled.\n");
@@ -9951,7 +9961,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
// Override IC if user provided an interleave count.
- IC = UserIC > 0 ? UserIC : IC;
+ IC = SafeUserIC > 0 ? SafeUserIC : IC;
// Emit diagnostic messages, if any.
const char *VAPassName = Hints.vectorizeAnalysisPassName();
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index 11cc971586773..f1fc78f117fba 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -417,21 +417,16 @@ for.end: ; preds = %for.body, %entry
; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
+; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 1)
define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
; CHECK-LABEL: @memory_dependence
; CHECK: vector.body:
; CHECK: %[[LOAD1:.*]] = load <4 x i32>
; CHECK: %[[LOAD2:.*]] = load <4 x i32>
-; CHECK: %[[LOAD3:.*]] = load <4 x i32>
-; CHECK: %[[LOAD4:.*]] = load <4 x i32>
-; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD3]], %[[LOAD1]]
-; CHECK: %[[ADD2:.*]] = add nsw <4 x i32> %[[LOAD4]], %[[LOAD2]]
-; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD3]]
-; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD4]]
+; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD2]], %[[LOAD1]]
+; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD2]]
; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]]
-; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]])
+; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[MUL1]])
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
new file mode 100644
index 0000000000000..034df3f54e7e5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; Make sure the unsafe user specified interleave count is ignored.
+
+; CHECK: LV: Disabling interleaving as user-specified interleave count is unsafe.
+; CHECK: remark: <unknown>:0:0: User-specified interleave count is not safe, interleave count is set to 1.
+; CHECK-LABEL: @loop_distance_4
+define void @loop_distance_4(i64 %N, ptr %a, ptr %b) {
+entry:
+ %cmp10 = icmp sgt i64 %N, 4
+ br i1 %cmp10, label %for.body, label %for.end
+
+for.body:
+ %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = getelementptr i32, ptr %b, i64 %indvars.iv
+ %arrayidx = getelementptr i8, ptr %0, i64 -16
+ %1 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
+ %2 = load i32, ptr %arrayidx2, align 4
+ %add = add nsw i32 %2, %1
+ store i32 %add, ptr %0, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+ ret void
+}
+
+!1 = !{!1, !2, !3}
+!2 = !{!"llvm.loop.interleave.count", i32 4}
+!3 = !{!"llvm.loop.vectorize.width", i32 4}
>From 31c6578bfd76cc6cf69ecd02489543ea17819f55 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 11 Aug 2025 14:54:13 +0000
Subject: [PATCH 2/5] - Reworded diagnostic message - Removed need for asserts
in new test
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 +++----
.../LoopVectorize/AArch64/scalable-reductions.ll | 1 +
.../test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll | 6 ++----
3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 38fecec6766c8..7e8c50852ff96 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9911,11 +9911,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
if (UserIC > 0 && UserIC != SafeUserIC) {
- LLVM_DEBUG(dbgs() << "LV: Disabling interleaving as user-specified "
- "interleave count is unsafe.\n");
+ LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
IntDiagMsg = {"InterleavingUnsafe",
- "User-specified interleave count is not safe, interleave "
- "count is set to 1."};
+ "Ignoring user-specified interleave count due to possibly "
+ "unsafe dependencies in the loop."};
InterleaveLoop = false;
} else if (!LVP.hasPlanWithVF(VF.Width) && SafeUserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index f1fc78f117fba..fb7890a3b82f4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -417,6 +417,7 @@ for.end: ; preds = %for.body, %entry
; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
+; CHECK-REMARK: Ignoring user-specified interleave count due to possibly unsafe dependencies in the loop.
; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 1)
define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
; CHECK-LABEL: @memory_dependence
diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
index 034df3f54e7e5..f2fb7a240bc9e 100644
--- a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
@@ -1,10 +1,8 @@
-; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -S < %s 2>&1 | FileCheck %s
; Make sure the unsafe user specified interleave count is ignored.
-; CHECK: LV: Disabling interleaving as user-specified interleave count is unsafe.
-; CHECK: remark: <unknown>:0:0: User-specified interleave count is not safe, interleave count is set to 1.
+; CHECK: remark: <unknown>:0:0: Ignoring user-specified interleave count due to possibly unsafe dependencies in the loop.
; CHECK-LABEL: @loop_distance_4
define void @loop_distance_4(i64 %N, ptr %a, ptr %b) {
entry:
>From 20fe702e2e84da337152139581ba5bf061b36751 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 12 Aug 2025 10:01:23 +0000
Subject: [PATCH 3/5] - Handle UserIC as part of selectInterleaveCount
---
.../Vectorize/LoopVectorizationPlanner.h | 4 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 72 ++++++++++---------
2 files changed, 40 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 456fa4c858535..ddf8b1054bf49 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -517,8 +517,8 @@ class LoopVectorizationPlanner {
/// If interleave count has been specified by metadata it will be returned.
/// Otherwise, the interleave count is computed and returned. VF and LoopCost
/// are the selected vectorization factor and the cost of the selected VF.
- unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
- InstructionCost LoopCost);
+ unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, unsigned UserIC,
+ InstructionCost LoopCost, bool &IntBeneficial);
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7e8c50852ff96..3a9cbfca91fca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4501,9 +4501,9 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
}
}
-unsigned
-LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
- InstructionCost LoopCost) {
+unsigned LoopVectorizationPlanner::selectInterleaveCount(
+ VPlan &Plan, ElementCount VF, unsigned UserIC, InstructionCost LoopCost,
+ bool &IntBeneficial) {
// -- The interleave heuristics --
// We interleave the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
@@ -4518,25 +4518,26 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// 3. We don't interleave if we think that we will spill registers to memory
// due to the increased register pressure.
- if (!CM.isScalarEpilogueAllowed())
+ // We used the distance for the interleave count. This should not be overriden
+ // by a user-specified IC.
+ if (!Legal->isSafeForAnyVectorWidth())
return 1;
+ if (!CM.isScalarEpilogueAllowed())
+ return std::max(1U, UserIC);
+
if (any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
IsaPred<VPEVLBasedIVPHIRecipe>)) {
LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. "
"Unroll factor forced to be 1.\n");
- return 1;
+ return std::max(1U, UserIC);
}
- // We used the distance for the interleave count.
- if (!Legal->isSafeForAnyVectorWidth())
- return 1;
-
// We don't attempt to perform interleaving for loops with uncountable early
// exits because the VPInstruction::AnyOf code cannot currently handle
// multiple parts.
if (Plan.hasEarlyExit())
- return 1;
+ return std::max(1U, UserIC);
const bool HasReductions =
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
@@ -4553,7 +4554,7 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// Loop body is free and there is no need for interleaving.
if (LoopCost == 0)
- return 1;
+ return std::max(1U, UserIC);
}
VPRegisterUsage R =
@@ -4690,7 +4691,8 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// benefit from interleaving.
if (VF.isVector() && HasReductions) {
LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
- return IC;
+ IntBeneficial = IC > 1;
+ return UserIC > 0 ? UserIC : IC;
}
// For any scalar loop that either requires runtime checks or predication we
@@ -4773,7 +4775,7 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
});
if (HasSelectCmpReductions) {
LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
- return 1;
+ return std::max(1U, UserIC);
}
// If we have a scalar reduction (vector reductions are already dealt with
@@ -4792,7 +4794,7 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
if (HasOrderedReductions) {
LLVM_DEBUG(
dbgs() << "LV: Not interleaving scalar ordered reductions.\n");
- return 1;
+ return std::max(1U, UserIC);
}
unsigned F = MaxNestedScalarReductionIC;
@@ -4805,7 +4807,9 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
std::max(StoresIC, LoadsIC) > SmallIC) {
LLVM_DEBUG(
dbgs() << "LV: Interleaving to saturate store or load ports.\n");
- return std::max(StoresIC, LoadsIC);
+ IC = std::max(StoresIC, LoadsIC);
+ IntBeneficial = IC > 1;
+ return UserIC > 0 ? UserIC : IC;
}
// If there are scalar reductions and TTI has enabled aggressive
@@ -4814,22 +4818,27 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
// Interleave no less than SmallIC but not as aggressive as the normal IC
// to satisfy the rare situation when resources are too limited.
- return std::max(IC / 2, SmallIC);
+ IC = std::max(IC / 2, SmallIC);
+ IntBeneficial = IC > 1;
+ return UserIC > 0 ? UserIC : IC;
}
LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
- return SmallIC;
+ IC = std::max(SmallIC, UserIC);
+ IntBeneficial = IC > 1;
+ return UserIC > 0 ? UserIC : IC;
}
// Interleave if this is a large loop (small loops are already dealt with by
// this point) that could benefit from interleaving.
if (AggressivelyInterleaveReductions) {
LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
- return IC;
+ IntBeneficial = IC > 1;
+ return UserIC > 0 ? UserIC : IC;
}
LLVM_DEBUG(dbgs() << "LV: Not Interleaving.\n");
- return 1;
+ return std::max(1U, UserIC);
}
bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
@@ -9844,10 +9853,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
- unsigned SafeUserIC = CM.Legal->isSafeForAnyVectorWidth() ? UserIC : 0;
-
// Plan how to best vectorize.
- LVP.plan(UserVF, SafeUserIC);
+ LVP.plan(UserVF, UserIC);
VectorizationFactor VF = LVP.computeBestVF();
unsigned IC = 1;
@@ -9855,16 +9862,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LVP.emitInvalidCostRemarks(ORE);
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
+ bool IntBeneficial = false;
if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
- IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
-
- unsigned SelectedIC = std::max(IC, SafeUserIC);
+ IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, UserIC,
+ VF.Cost, IntBeneficial);
// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
- if (VF.Width.isVector() || SelectedIC > 1) {
- Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+ if (VF.Width.isVector() || IC > 1) {
+ Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, IC);
// Bail out early if either the SCEV or memory runtime checks are known to
// fail. In that case, the vector loop would never execute.
@@ -9910,13 +9917,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (UserIC > 0 && UserIC != SafeUserIC) {
+ if (IC == 1 && UserIC > 1) {
LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
IntDiagMsg = {"InterleavingUnsafe",
"Ignoring user-specified interleave count due to possibly "
"unsafe dependencies in the loop."};
InterleaveLoop = false;
- } else if (!LVP.hasPlanWithVF(VF.Width) && SafeUserIC > 1) {
+ } else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -9924,7 +9931,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
IntDiagMsg = {"InterleavingAvoided",
"Ignoring UserIC, because interleaving was avoided up front"};
InterleaveLoop = false;
- } else if (IC == 1 && SafeUserIC <= 1) {
+ } else if (!IntBeneficial && UserIC <= 1) {
// Tell the user interleaving is not beneficial.
LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
IntDiagMsg = {
@@ -9936,7 +9943,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
IntDiagMsg.second +=
" and is explicitly disabled or interleave count is set to 1";
}
- } else if (IC > 1 && SafeUserIC == 1) {
+ } else if (IntBeneficial && UserIC == 1) {
// Tell the user interleaving is beneficial, but it explicitly disabled.
LLVM_DEBUG(dbgs() << "LV: Interleaving is beneficial but is explicitly "
"disabled.\n");
@@ -9959,9 +9966,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InterleaveLoop = false;
}
- // Override IC if user provided an interleave count.
- IC = SafeUserIC > 0 ? SafeUserIC : IC;
-
// Emit diagnostic messages, if any.
const char *VAPassName = Hints.vectorizeAnalysisPassName();
if (!VectorizeLoop && !InterleaveLoop) {
>From 17eda9450a6c5d61fcf93a696893d4f40edf1c1a Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 9 Oct 2025 10:20:29 +0000
Subject: [PATCH 4/5] - Moved UserIC back out of selectInterleaveCount
---
.../Vectorize/LoopVectorizationPlanner.h | 4 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 67 +++++++++----------
2 files changed, 32 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ddf8b1054bf49..456fa4c858535 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -517,8 +517,8 @@ class LoopVectorizationPlanner {
/// If interleave count has been specified by metadata it will be returned.
/// Otherwise, the interleave count is computed and returned. VF and LoopCost
/// are the selected vectorization factor and the cost of the selected VF.
- unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, unsigned UserIC,
- InstructionCost LoopCost, bool &IntBeneficial);
+ unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
+ InstructionCost LoopCost);
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3a9cbfca91fca..1bf6529fc4011 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4501,9 +4501,9 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
}
}
-unsigned LoopVectorizationPlanner::selectInterleaveCount(
- VPlan &Plan, ElementCount VF, unsigned UserIC, InstructionCost LoopCost,
- bool &IntBeneficial) {
+unsigned
+LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
+ InstructionCost LoopCost) {
// -- The interleave heuristics --
// We interleave the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
@@ -4518,26 +4518,25 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
// 3. We don't interleave if we think that we will spill registers to memory
// due to the increased register pressure.
- // We used the distance for the interleave count. This should not be overriden
- // by a user-specified IC.
- if (!Legal->isSafeForAnyVectorWidth())
- return 1;
-
if (!CM.isScalarEpilogueAllowed())
- return std::max(1U, UserIC);
+ return 1;
if (any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
IsaPred<VPEVLBasedIVPHIRecipe>)) {
LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. "
"Unroll factor forced to be 1.\n");
- return std::max(1U, UserIC);
+ return 1;
}
+ // We used the distance for the interleave count.
+ if (!Legal->isSafeForAnyVectorWidth())
+ return 1;
+
// We don't attempt to perform interleaving for loops with uncountable early
// exits because the VPInstruction::AnyOf code cannot currently handle
// multiple parts.
if (Plan.hasEarlyExit())
- return std::max(1U, UserIC);
+ return 1;
const bool HasReductions =
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
@@ -4554,7 +4553,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
// Loop body is free and there is no need for interleaving.
if (LoopCost == 0)
- return std::max(1U, UserIC);
+ return 1;
}
VPRegisterUsage R =
@@ -4691,8 +4690,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
// benefit from interleaving.
if (VF.isVector() && HasReductions) {
LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
- IntBeneficial = IC > 1;
- return UserIC > 0 ? UserIC : IC;
+ return IC;
}
// For any scalar loop that either requires runtime checks or predication we
@@ -4775,7 +4773,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
});
if (HasSelectCmpReductions) {
LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
- return std::max(1U, UserIC);
+ return 1;
}
// If we have a scalar reduction (vector reductions are already dealt with
@@ -4794,7 +4792,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
if (HasOrderedReductions) {
LLVM_DEBUG(
dbgs() << "LV: Not interleaving scalar ordered reductions.\n");
- return std::max(1U, UserIC);
+ return 1;
}
unsigned F = MaxNestedScalarReductionIC;
@@ -4807,9 +4805,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
std::max(StoresIC, LoadsIC) > SmallIC) {
LLVM_DEBUG(
dbgs() << "LV: Interleaving to saturate store or load ports.\n");
- IC = std::max(StoresIC, LoadsIC);
- IntBeneficial = IC > 1;
- return UserIC > 0 ? UserIC : IC;
+ return std::max(StoresIC, LoadsIC);
}
// If there are scalar reductions and TTI has enabled aggressive
@@ -4818,27 +4814,22 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
// Interleave no less than SmallIC but not as aggressive as the normal IC
// to satisfy the rare situation when resources are too limited.
- IC = std::max(IC / 2, SmallIC);
- IntBeneficial = IC > 1;
- return UserIC > 0 ? UserIC : IC;
+ return std::max(IC / 2, SmallIC);
}
LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
- IC = std::max(SmallIC, UserIC);
- IntBeneficial = IC > 1;
- return UserIC > 0 ? UserIC : IC;
+ return SmallIC;
}
// Interleave if this is a large loop (small loops are already dealt with by
// this point) that could benefit from interleaving.
if (AggressivelyInterleaveReductions) {
LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
- IntBeneficial = IC > 1;
- return UserIC > 0 ? UserIC : IC;
+ return IC;
}
LLVM_DEBUG(dbgs() << "LV: Not Interleaving.\n");
- return std::max(1U, UserIC);
+ return 1;
}
bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
@@ -9851,7 +9842,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
- unsigned UserIC = Hints.getInterleave();
+ unsigned UserIC = LVL.isSafeForAnyVectorWidth() ? Hints.getInterleave() : 1;
// Plan how to best vectorize.
LVP.plan(UserVF, UserIC);
@@ -9862,16 +9853,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LVP.emitInvalidCostRemarks(ORE);
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
- bool IntBeneficial = false;
if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
- IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, UserIC,
- VF.Cost, IntBeneficial);
+ IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
+ unsigned SelectedIC = std::max(IC, UserIC);
// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
- if (VF.Width.isVector() || IC > 1) {
- Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, IC);
+ if (VF.Width.isVector() || SelectedIC > 1) {
+ Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
// Bail out early if either the SCEV or memory runtime checks are known to
// fail. In that case, the vector loop would never execute.
@@ -9917,7 +9907,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (IC == 1 && UserIC > 1) {
+ if (UserIC == 1 && Hints.getInterleave() > 1) {
LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
IntDiagMsg = {"InterleavingUnsafe",
"Ignoring user-specified interleave count due to possibly "
@@ -9931,7 +9921,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
IntDiagMsg = {"InterleavingAvoided",
"Ignoring UserIC, because interleaving was avoided up front"};
InterleaveLoop = false;
- } else if (!IntBeneficial && UserIC <= 1) {
+ } else if (IC == 1 && UserIC <= 1) {
// Tell the user interleaving is not beneficial.
LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
IntDiagMsg = {
@@ -9943,7 +9933,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
IntDiagMsg.second +=
" and is explicitly disabled or interleave count is set to 1";
}
- } else if (IntBeneficial && UserIC == 1) {
+ } else if (IC > 1 && UserIC == 1) {
// Tell the user interleaving is beneficial, but it explicitly disabled.
LLVM_DEBUG(dbgs() << "LV: Interleaving is beneficial but is explicitly "
"disabled.\n");
@@ -9966,6 +9956,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InterleaveLoop = false;
}
+ // Override IC if user provided an interleave count.
+ IC = UserIC > 0 ? UserIC : IC;
+
// Emit diagnostic messages, if any.
const char *VAPassName = Hints.vectorizeAnalysisPassName();
if (!VectorizeLoop && !InterleaveLoop) {
>From 444e58cc075d5416fce5a6875fb32d9d403a8472 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 21 Oct 2025 12:44:44 +0000
Subject: [PATCH 5/5] - Only set UserIC to 1 if an interleave count > 1 was
requested
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1bf6529fc4011..3cae917643b45 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9842,7 +9842,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
- unsigned UserIC = LVL.isSafeForAnyVectorWidth() ? Hints.getInterleave() : 1;
+ unsigned UserIC = Hints.getInterleave();
+ if (UserIC > 1 && !LVL.isSafeForAnyVectorWidth())
+ UserIC = 1;
// Plan how to best vectorize.
LVP.plan(UserVF, UserIC);
More information about the llvm-commits
mailing list