[llvm] [LV] Ignore user-specified interleave count when unsafe. (PR #153009)

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 21 08:16:18 PDT 2025


https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/153009

>From 938a0e31a231ec4715821a3ba5d1dcfc83723533 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 7 Aug 2025 13:53:14 +0000
Subject: [PATCH 1/6] [LV] Ignore user-specified interleave count when unsafe.

When an VF is specified via a loop hint, it will be clamped to a
safe VF or ignored if it is found to be unsafe. This is not the
case for user-specified interleave counts, which can lead to
loops such as the following with a memory dependence being
vectorised with the specified IC:

  #pragma clang loop interleave_count(4)
  for (int i = 4; i < LEN; i++)
      b[i] = b[i - 4] + a[i];

According to [1], loop hints are ignored if they are not safe to apply.

This patch adds a check to prevent vectorisation with interleaving if
isSafeForAnyVectorWidth() returns false. This is already checked in
selectInterleaveCount().

[1] https://llvm.org/docs/LangRef.html#llvm-loop-vectorize-and-llvm-loop-interleave
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 22 +++++++++----
 .../AArch64/scalable-reductions.ll            | 13 +++-----
 .../LoopVectorize/unsafe-ic-hint-remark.ll    | 33 +++++++++++++++++++
 3 files changed, 53 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b7963b98e7a4..38fecec6766c8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9844,8 +9844,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   ElementCount UserVF = Hints.getWidth();
   unsigned UserIC = Hints.getInterleave();
 
+  unsigned SafeUserIC = CM.Legal->isSafeForAnyVectorWidth() ? UserIC : 0;
+
   // Plan how to best vectorize.
-  LVP.plan(UserVF, UserIC);
+  LVP.plan(UserVF, SafeUserIC);
   VectorizationFactor VF = LVP.computeBestVF();
   unsigned IC = 1;
 
@@ -9857,7 +9859,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     // Select the interleave count.
     IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
 
-    unsigned SelectedIC = std::max(IC, UserIC);
+    unsigned SelectedIC = std::max(IC, SafeUserIC);
+
     //  Optimistically generate runtime checks if they are needed. Drop them if
     //  they turn out to not be profitable.
     if (VF.Width.isVector() || SelectedIC > 1) {
@@ -9907,7 +9910,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     VectorizeLoop = false;
   }
 
-  if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
+  if (UserIC > 0 && UserIC != SafeUserIC) {
+    LLVM_DEBUG(dbgs() << "LV: Disabling interleaving as user-specified "
+                         "interleave count is unsafe.\n");
+    IntDiagMsg = {"InterleavingUnsafe",
+                  "User-specified interleave count is not safe, interleave "
+                  "count is set to 1."};
+    InterleaveLoop = false;
+  } else if (!LVP.hasPlanWithVF(VF.Width) && SafeUserIC > 1) {
     // Tell the user interleaving was avoided up-front, despite being explicitly
     // requested.
     LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -9915,7 +9925,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     IntDiagMsg = {"InterleavingAvoided",
                   "Ignoring UserIC, because interleaving was avoided up front"};
     InterleaveLoop = false;
-  } else if (IC == 1 && UserIC <= 1) {
+  } else if (IC == 1 && SafeUserIC <= 1) {
     // Tell the user interleaving is not beneficial.
     LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
     IntDiagMsg = {
@@ -9927,7 +9937,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       IntDiagMsg.second +=
           " and is explicitly disabled or interleave count is set to 1";
     }
-  } else if (IC > 1 && UserIC == 1) {
+  } else if (IC > 1 && SafeUserIC == 1) {
     // Tell the user interleaving is beneficial, but it explicitly disabled.
     LLVM_DEBUG(dbgs() << "LV: Interleaving is beneficial but is explicitly "
                          "disabled.\n");
@@ -9951,7 +9961,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   }
 
   // Override IC if user provided an interleave count.
-  IC = UserIC > 0 ? UserIC : IC;
+  IC = SafeUserIC > 0 ? SafeUserIC : IC;
 
   // Emit diagnostic messages, if any.
   const char *VAPassName = Hints.vectorizeAnalysisPassName();
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index 11cc971586773..f1fc78f117fba 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -417,21 +417,16 @@ for.end:                                 ; preds = %for.body, %entry
 
 ; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
+; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 1)
 define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @memory_dependence
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
 ; CHECK: %[[LOAD2:.*]] = load <4 x i32>
-; CHECK: %[[LOAD3:.*]] = load <4 x i32>
-; CHECK: %[[LOAD4:.*]] = load <4 x i32>
-; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD3]], %[[LOAD1]]
-; CHECK: %[[ADD2:.*]] = add nsw <4 x i32> %[[LOAD4]], %[[LOAD2]]
-; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD3]]
-; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD4]]
+; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD2]], %[[LOAD1]]
+; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD2]]
 ; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]]
-; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]])
+; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[MUL1]])
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
new file mode 100644
index 0000000000000..034df3f54e7e5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; Make sure the unsafe user specified interleave count is ignored.
+
+; CHECK: LV: Disabling interleaving as user-specified interleave count is unsafe.
+; CHECK: remark: <unknown>:0:0: User-specified interleave count is not safe, interleave count is set to 1.
+; CHECK-LABEL: @loop_distance_4
+define void @loop_distance_4(i64 %N, ptr %a, ptr %b) {
+entry:
+  %cmp10 = icmp sgt i64 %N, 4
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = getelementptr i32, ptr %b, i64 %indvars.iv
+  %arrayidx = getelementptr i8, ptr %0, i64 -16
+  %1 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx2, align 4
+  %add = add nsw i32 %2, %1
+  store i32 %add, ptr %0, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+  ret void
+}
+
+!1 = !{!1, !2, !3}
+!2 = !{!"llvm.loop.interleave.count", i32 4}
+!3 = !{!"llvm.loop.vectorize.width", i32 4}

>From 31c6578bfd76cc6cf69ecd02489543ea17819f55 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 11 Aug 2025 14:54:13 +0000
Subject: [PATCH 2/6] - Reworded diagnostic message - Removed need for asserts
 in new test

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp            | 7 +++----
 .../LoopVectorize/AArch64/scalable-reductions.ll           | 1 +
 .../test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll | 6 ++----
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 38fecec6766c8..7e8c50852ff96 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9911,11 +9911,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   }
 
   if (UserIC > 0 && UserIC != SafeUserIC) {
-    LLVM_DEBUG(dbgs() << "LV: Disabling interleaving as user-specified "
-                         "interleave count is unsafe.\n");
+    LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
     IntDiagMsg = {"InterleavingUnsafe",
-                  "User-specified interleave count is not safe, interleave "
-                  "count is set to 1."};
+                  "Ignoring user-specified interleave count due to possibly "
+                  "unsafe dependencies in the loop."};
     InterleaveLoop = false;
   } else if (!LVP.hasPlanWithVF(VF.Width) && SafeUserIC > 1) {
     // Tell the user interleaving was avoided up-front, despite being explicitly
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index f1fc78f117fba..fb7890a3b82f4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -417,6 +417,7 @@ for.end:                                 ; preds = %for.body, %entry
 
 ; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
+; CHECK-REMARK: Ignoring user-specified interleave count due to possibly unsafe dependencies in the loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 1)
 define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @memory_dependence
diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
index 034df3f54e7e5..f2fb7a240bc9e 100644
--- a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
@@ -1,10 +1,8 @@
-; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -S < %s 2>&1 | FileCheck %s
 
 ; Make sure the unsafe user specified interleave count is ignored.
 
-; CHECK: LV: Disabling interleaving as user-specified interleave count is unsafe.
-; CHECK: remark: <unknown>:0:0: User-specified interleave count is not safe, interleave count is set to 1.
+; CHECK: remark: <unknown>:0:0: Ignoring user-specified interleave count due to possibly unsafe dependencies in the loop.
 ; CHECK-LABEL: @loop_distance_4
 define void @loop_distance_4(i64 %N, ptr %a, ptr %b) {
 entry:

>From 20fe702e2e84da337152139581ba5bf061b36751 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 12 Aug 2025 10:01:23 +0000
Subject: [PATCH 3/6] - Handle UserIC as part of selectInterleaveCount

---
 .../Vectorize/LoopVectorizationPlanner.h      |  4 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    | 72 ++++++++++---------
 2 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 456fa4c858535..ddf8b1054bf49 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -517,8 +517,8 @@ class LoopVectorizationPlanner {
   /// If interleave count has been specified by metadata it will be returned.
   /// Otherwise, the interleave count is computed and returned. VF and LoopCost
   /// are the selected vectorization factor and the cost of the selected VF.
-  unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
-                                 InstructionCost LoopCost);
+  unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, unsigned UserIC,
+                                 InstructionCost LoopCost, bool &IntBeneficial);
 
   /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
   /// according to the best selected \p VF and  \p UF.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7e8c50852ff96..3a9cbfca91fca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4501,9 +4501,9 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
   }
 }
 
-unsigned
-LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
-                                                InstructionCost LoopCost) {
+unsigned LoopVectorizationPlanner::selectInterleaveCount(
+    VPlan &Plan, ElementCount VF, unsigned UserIC, InstructionCost LoopCost,
+    bool &IntBeneficial) {
   // -- The interleave heuristics --
   // We interleave the loop in order to expose ILP and reduce the loop overhead.
   // There are many micro-architectural considerations that we can't predict
@@ -4518,25 +4518,26 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
   // 3. We don't interleave if we think that we will spill registers to memory
   // due to the increased register pressure.
 
-  if (!CM.isScalarEpilogueAllowed())
+  // We used the distance for the interleave count. This should not be overriden
+  // by a user-specified IC.
+  if (!Legal->isSafeForAnyVectorWidth())
     return 1;
 
+  if (!CM.isScalarEpilogueAllowed())
+    return std::max(1U, UserIC);
+
   if (any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
              IsaPred<VPEVLBasedIVPHIRecipe>)) {
     LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. "
                          "Unroll factor forced to be 1.\n");
-    return 1;
+    return std::max(1U, UserIC);
   }
 
-  // We used the distance for the interleave count.
-  if (!Legal->isSafeForAnyVectorWidth())
-    return 1;
-
   // We don't attempt to perform interleaving for loops with uncountable early
   // exits because the VPInstruction::AnyOf code cannot currently handle
   // multiple parts.
   if (Plan.hasEarlyExit())
-    return 1;
+    return std::max(1U, UserIC);
 
   const bool HasReductions =
       any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
@@ -4553,7 +4554,7 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
 
     // Loop body is free and there is no need for interleaving.
     if (LoopCost == 0)
-      return 1;
+      return std::max(1U, UserIC);
   }
 
   VPRegisterUsage R =
@@ -4690,7 +4691,8 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
   // benefit from interleaving.
   if (VF.isVector() && HasReductions) {
     LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
-    return IC;
+    IntBeneficial = IC > 1;
+    return UserIC > 0 ? UserIC : IC;
   }
 
   // For any scalar loop that either requires runtime checks or predication we
@@ -4773,7 +4775,7 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
                });
     if (HasSelectCmpReductions) {
       LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
-      return 1;
+      return std::max(1U, UserIC);
     }
 
     // If we have a scalar reduction (vector reductions are already dealt with
@@ -4792,7 +4794,7 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
       if (HasOrderedReductions) {
         LLVM_DEBUG(
             dbgs() << "LV: Not interleaving scalar ordered reductions.\n");
-        return 1;
+        return std::max(1U, UserIC);
       }
 
       unsigned F = MaxNestedScalarReductionIC;
@@ -4805,7 +4807,9 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
         std::max(StoresIC, LoadsIC) > SmallIC) {
       LLVM_DEBUG(
           dbgs() << "LV: Interleaving to saturate store or load ports.\n");
-      return std::max(StoresIC, LoadsIC);
+      IC = std::max(StoresIC, LoadsIC);
+      IntBeneficial = IC > 1;
+      return UserIC > 0 ? UserIC : IC;
     }
 
     // If there are scalar reductions and TTI has enabled aggressive
@@ -4814,22 +4818,27 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
       LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
       // Interleave no less than SmallIC but not as aggressive as the normal IC
       // to satisfy the rare situation when resources are too limited.
-      return std::max(IC / 2, SmallIC);
+      IC = std::max(IC / 2, SmallIC);
+      IntBeneficial = IC > 1;
+      return UserIC > 0 ? UserIC : IC;
     }
 
     LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
-    return SmallIC;
+    IC = std::max(SmallIC, UserIC);
+    IntBeneficial = IC > 1;
+    return UserIC > 0 ? UserIC : IC;
   }
 
   // Interleave if this is a large loop (small loops are already dealt with by
   // this point) that could benefit from interleaving.
   if (AggressivelyInterleaveReductions) {
     LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
-    return IC;
+    IntBeneficial = IC > 1;
+    return UserIC > 0 ? UserIC : IC;
   }
 
   LLVM_DEBUG(dbgs() << "LV: Not Interleaving.\n");
-  return 1;
+  return std::max(1U, UserIC);
 }
 
 bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
@@ -9844,10 +9853,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   ElementCount UserVF = Hints.getWidth();
   unsigned UserIC = Hints.getInterleave();
 
-  unsigned SafeUserIC = CM.Legal->isSafeForAnyVectorWidth() ? UserIC : 0;
-
   // Plan how to best vectorize.
-  LVP.plan(UserVF, SafeUserIC);
+  LVP.plan(UserVF, UserIC);
   VectorizationFactor VF = LVP.computeBestVF();
   unsigned IC = 1;
 
@@ -9855,16 +9862,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     LVP.emitInvalidCostRemarks(ORE);
 
   GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
+  bool IntBeneficial = false;
   if (LVP.hasPlanWithVF(VF.Width)) {
     // Select the interleave count.
-    IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
-
-    unsigned SelectedIC = std::max(IC, SafeUserIC);
+    IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, UserIC,
+                                   VF.Cost, IntBeneficial);
 
     //  Optimistically generate runtime checks if they are needed. Drop them if
     //  they turn out to not be profitable.
-    if (VF.Width.isVector() || SelectedIC > 1) {
-      Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+    if (VF.Width.isVector() || IC > 1) {
+      Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, IC);
 
       // Bail out early if either the SCEV or memory runtime checks are known to
       // fail. In that case, the vector loop would never execute.
@@ -9910,13 +9917,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     VectorizeLoop = false;
   }
 
-  if (UserIC > 0 && UserIC != SafeUserIC) {
+  if (IC == 1 && UserIC > 1) {
     LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
     IntDiagMsg = {"InterleavingUnsafe",
                   "Ignoring user-specified interleave count due to possibly "
                   "unsafe dependencies in the loop."};
     InterleaveLoop = false;
-  } else if (!LVP.hasPlanWithVF(VF.Width) && SafeUserIC > 1) {
+  } else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
     // Tell the user interleaving was avoided up-front, despite being explicitly
     // requested.
     LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -9924,7 +9931,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     IntDiagMsg = {"InterleavingAvoided",
                   "Ignoring UserIC, because interleaving was avoided up front"};
     InterleaveLoop = false;
-  } else if (IC == 1 && SafeUserIC <= 1) {
+  } else if (!IntBeneficial && UserIC <= 1) {
     // Tell the user interleaving is not beneficial.
     LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
     IntDiagMsg = {
@@ -9936,7 +9943,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       IntDiagMsg.second +=
           " and is explicitly disabled or interleave count is set to 1";
     }
-  } else if (IC > 1 && SafeUserIC == 1) {
+  } else if (IntBeneficial && UserIC == 1) {
     // Tell the user interleaving is beneficial, but it explicitly disabled.
     LLVM_DEBUG(dbgs() << "LV: Interleaving is beneficial but is explicitly "
                          "disabled.\n");
@@ -9959,9 +9966,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     InterleaveLoop = false;
   }
 
-  // Override IC if user provided an interleave count.
-  IC = SafeUserIC > 0 ? SafeUserIC : IC;
-
   // Emit diagnostic messages, if any.
   const char *VAPassName = Hints.vectorizeAnalysisPassName();
   if (!VectorizeLoop && !InterleaveLoop) {

>From 17eda9450a6c5d61fcf93a696893d4f40edf1c1a Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 9 Oct 2025 10:20:29 +0000
Subject: [PATCH 4/6] - Moved UserIC back out of selectInterleaveCount

---
 .../Vectorize/LoopVectorizationPlanner.h      |  4 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    | 67 +++++++++----------
 2 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ddf8b1054bf49..456fa4c858535 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -517,8 +517,8 @@ class LoopVectorizationPlanner {
   /// If interleave count has been specified by metadata it will be returned.
   /// Otherwise, the interleave count is computed and returned. VF and LoopCost
   /// are the selected vectorization factor and the cost of the selected VF.
-  unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, unsigned UserIC,
-                                 InstructionCost LoopCost, bool &IntBeneficial);
+  unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
+                                 InstructionCost LoopCost);
 
   /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
   /// according to the best selected \p VF and  \p UF.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3a9cbfca91fca..1bf6529fc4011 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4501,9 +4501,9 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
   }
 }
 
-unsigned LoopVectorizationPlanner::selectInterleaveCount(
-    VPlan &Plan, ElementCount VF, unsigned UserIC, InstructionCost LoopCost,
-    bool &IntBeneficial) {
+unsigned
+LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
+                                                InstructionCost LoopCost) {
   // -- The interleave heuristics --
   // We interleave the loop in order to expose ILP and reduce the loop overhead.
   // There are many micro-architectural considerations that we can't predict
@@ -4518,26 +4518,25 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
   // 3. We don't interleave if we think that we will spill registers to memory
   // due to the increased register pressure.
 
-  // We used the distance for the interleave count. This should not be overriden
-  // by a user-specified IC.
-  if (!Legal->isSafeForAnyVectorWidth())
-    return 1;
-
   if (!CM.isScalarEpilogueAllowed())
-    return std::max(1U, UserIC);
+    return 1;
 
   if (any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
              IsaPred<VPEVLBasedIVPHIRecipe>)) {
     LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. "
                          "Unroll factor forced to be 1.\n");
-    return std::max(1U, UserIC);
+    return 1;
   }
 
+  // We used the distance for the interleave count.
+  if (!Legal->isSafeForAnyVectorWidth())
+    return 1;
+
   // We don't attempt to perform interleaving for loops with uncountable early
   // exits because the VPInstruction::AnyOf code cannot currently handle
   // multiple parts.
   if (Plan.hasEarlyExit())
-    return std::max(1U, UserIC);
+    return 1;
 
   const bool HasReductions =
       any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
@@ -4554,7 +4553,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
 
     // Loop body is free and there is no need for interleaving.
     if (LoopCost == 0)
-      return std::max(1U, UserIC);
+      return 1;
   }
 
   VPRegisterUsage R =
@@ -4691,8 +4690,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
   // benefit from interleaving.
   if (VF.isVector() && HasReductions) {
     LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
-    IntBeneficial = IC > 1;
-    return UserIC > 0 ? UserIC : IC;
+    return IC;
   }
 
   // For any scalar loop that either requires runtime checks or predication we
@@ -4775,7 +4773,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
                });
     if (HasSelectCmpReductions) {
       LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
-      return std::max(1U, UserIC);
+      return 1;
     }
 
     // If we have a scalar reduction (vector reductions are already dealt with
@@ -4794,7 +4792,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
       if (HasOrderedReductions) {
         LLVM_DEBUG(
             dbgs() << "LV: Not interleaving scalar ordered reductions.\n");
-        return std::max(1U, UserIC);
+        return 1;
       }
 
       unsigned F = MaxNestedScalarReductionIC;
@@ -4807,9 +4805,7 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
         std::max(StoresIC, LoadsIC) > SmallIC) {
       LLVM_DEBUG(
           dbgs() << "LV: Interleaving to saturate store or load ports.\n");
-      IC = std::max(StoresIC, LoadsIC);
-      IntBeneficial = IC > 1;
-      return UserIC > 0 ? UserIC : IC;
+      return std::max(StoresIC, LoadsIC);
     }
 
     // If there are scalar reductions and TTI has enabled aggressive
@@ -4818,27 +4814,22 @@ unsigned LoopVectorizationPlanner::selectInterleaveCount(
       LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
       // Interleave no less than SmallIC but not as aggressive as the normal IC
       // to satisfy the rare situation when resources are too limited.
-      IC = std::max(IC / 2, SmallIC);
-      IntBeneficial = IC > 1;
-      return UserIC > 0 ? UserIC : IC;
+      return std::max(IC / 2, SmallIC);
     }
 
     LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
-    IC = std::max(SmallIC, UserIC);
-    IntBeneficial = IC > 1;
-    return UserIC > 0 ? UserIC : IC;
+    return SmallIC;
   }
 
   // Interleave if this is a large loop (small loops are already dealt with by
   // this point) that could benefit from interleaving.
   if (AggressivelyInterleaveReductions) {
     LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
-    IntBeneficial = IC > 1;
-    return UserIC > 0 ? UserIC : IC;
+    return IC;
   }
 
   LLVM_DEBUG(dbgs() << "LV: Not Interleaving.\n");
-  return std::max(1U, UserIC);
+  return 1;
 }
 
 bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
@@ -9851,7 +9842,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
   // Get user vectorization factor and interleave count.
   ElementCount UserVF = Hints.getWidth();
-  unsigned UserIC = Hints.getInterleave();
+  unsigned UserIC = LVL.isSafeForAnyVectorWidth() ? Hints.getInterleave() : 1;
 
   // Plan how to best vectorize.
   LVP.plan(UserVF, UserIC);
@@ -9862,16 +9853,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     LVP.emitInvalidCostRemarks(ORE);
 
   GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
-  bool IntBeneficial = false;
   if (LVP.hasPlanWithVF(VF.Width)) {
     // Select the interleave count.
-    IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, UserIC,
-                                   VF.Cost, IntBeneficial);
+    IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
 
+    unsigned SelectedIC = std::max(IC, UserIC);
     //  Optimistically generate runtime checks if they are needed. Drop them if
     //  they turn out to not be profitable.
-    if (VF.Width.isVector() || IC > 1) {
-      Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, IC);
+    if (VF.Width.isVector() || SelectedIC > 1) {
+      Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
 
       // Bail out early if either the SCEV or memory runtime checks are known to
       // fail. In that case, the vector loop would never execute.
@@ -9917,7 +9907,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     VectorizeLoop = false;
   }
 
-  if (IC == 1 && UserIC > 1) {
+  if (UserIC == 1 && Hints.getInterleave() > 1) {
     LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
     IntDiagMsg = {"InterleavingUnsafe",
                   "Ignoring user-specified interleave count due to possibly "
@@ -9931,7 +9921,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     IntDiagMsg = {"InterleavingAvoided",
                   "Ignoring UserIC, because interleaving was avoided up front"};
     InterleaveLoop = false;
-  } else if (!IntBeneficial && UserIC <= 1) {
+  } else if (IC == 1 && UserIC <= 1) {
     // Tell the user interleaving is not beneficial.
     LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
     IntDiagMsg = {
@@ -9943,7 +9933,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       IntDiagMsg.second +=
           " and is explicitly disabled or interleave count is set to 1";
     }
-  } else if (IntBeneficial && UserIC == 1) {
+  } else if (IC > 1 && UserIC == 1) {
     // Tell the user interleaving is beneficial, but it explicitly disabled.
     LLVM_DEBUG(dbgs() << "LV: Interleaving is beneficial but is explicitly "
                          "disabled.\n");
@@ -9966,6 +9956,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     InterleaveLoop = false;
   }
 
+  // Override IC if user provided an interleave count.
+  IC = UserIC > 0 ? UserIC : IC;
+
   // Emit diagnostic messages, if any.
   const char *VAPassName = Hints.vectorizeAnalysisPassName();
   if (!VectorizeLoop && !InterleaveLoop) {

>From 444e58cc075d5416fce5a6875fb32d9d403a8472 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 21 Oct 2025 12:44:44 +0000
Subject: [PATCH 5/6] - Only set UserIC to 1 if an interleave count > 1 was
 requested

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1bf6529fc4011..3cae917643b45 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9842,7 +9842,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
   // Get user vectorization factor and interleave count.
   ElementCount UserVF = Hints.getWidth();
-  unsigned UserIC = LVL.isSafeForAnyVectorWidth() ? Hints.getInterleave() : 1;
+  unsigned UserIC = Hints.getInterleave();
+  if (UserIC > 1 && !LVL.isSafeForAnyVectorWidth())
+    UserIC = 1;
 
   // Plan how to best vectorize.
   LVP.plan(UserVF, UserIC);

>From 5ad8fb2ac1cde81889c27874f3000108ea25bf8d Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 21 Oct 2025 14:58:42 +0000
Subject: [PATCH 6/6] - Add an assert for !isSafeForAnyVectorWidth() - Cleanup
 test

---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  2 ++
 .../LoopVectorize/unsafe-ic-hint-remark.ll    | 19 +++++++++----------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3cae917643b45..4a185023fae29 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9910,6 +9910,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   }
 
   if (UserIC == 1 && Hints.getInterleave() > 1) {
+    assert(!LVL.isSafeForAnyVectorWidth() &&
+           "UserIC should only be ignored due to unsafe dependencies");
     LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
     IntDiagMsg = {"InterleavingUnsafe",
                   "Ignoring user-specified interleave count due to possibly "
diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
index f2fb7a240bc9e..01934b1d7fbd2 100644
--- a/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
@@ -4,23 +4,22 @@
 
 ; CHECK: remark: <unknown>:0:0: Ignoring user-specified interleave count due to possibly unsafe dependencies in the loop.
 ; CHECK-LABEL: @loop_distance_4
-define void @loop_distance_4(i64 %N, ptr %a, ptr %b) {
+define void @loop_distance_4(ptr %a, ptr %b) {
 entry:
-  %cmp10 = icmp sgt i64 %N, 4
-  br i1 %cmp10, label %for.body, label %for.end
+  br label %loop
 
-for.body:
-  %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
-  %0 = getelementptr i32, ptr %b, i64 %indvars.iv
+loop:
+  %iv = phi i64 [ 4, %entry ], [ %iv.next, %loop ]
+  %0 = getelementptr i32, ptr %b, i64 %iv
   %arrayidx = getelementptr i8, ptr %0, i64 -16
   %1 = load i32, ptr %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds nuw i32, ptr %a, i64 %iv
   %2 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %2, %1
   store i32 %add, ptr %0, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %N
-  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 64
+  br i1 %exitcond.not, label %for.end, label %loop, !llvm.loop !1
 
 for.end:
   ret void



More information about the llvm-commits mailing list