[llvm] [SystemZ, LoopVectorizer] Enable vectorization of epilogue loops after VF16. (PR #172925)
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 20 08:26:15 PST 2026
https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/172925
>From dfd415fe83f9cb88989f79c4f99d9c50f2ef15a8 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 19 Dec 2025 00:07:29 +0100
Subject: [PATCH 1/5] LV: Enable vectorization of epilogue loops after VF16.
---
.../SystemZ/SystemZTargetTransformInfo.cpp | 10 +++++++
.../SystemZ/SystemZTargetTransformInfo.h | 2 ++
.../SystemZ/vectorized-epilogue-loop.ll | 28 +++++++++++++++++++
3 files changed, 40 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 2611c291abaa6..3b6f8ce08fd73 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -478,6 +478,16 @@ unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
}
+unsigned SystemZTTIImpl::getMaxInterleaveFactor(ElementCount VF) const {
+ // Enable vectorization of (LoopVectorizer) epilogue loop after VF 16 main
+ // loop. This can be highly beneficial when the original loop handles bytes
+ // (i8) and most of the time is not spent in the main vectorized loop
+ // (x264/mc_chroma).
+ if (VF == ElementCount::getFixed(16))
+ return 2;
+ return 1;
+}
+
bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) const {
EVT VT = TLI->getValueType(DL, DataType);
return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index fc681dec1859a..929405e946c55 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -82,6 +82,8 @@ class SystemZTTIImpl final : public BasicTTIImplBase<SystemZTTIImpl> {
bool HasCall) const override;
bool enableWritePrefetching() const override { return true; }
+ unsigned getMaxInterleaveFactor(ElementCount VF) const override;
+
bool hasDivRemOp(Type *DataType, bool IsSigned) const override;
bool prefersVectorizedAddressing() const override { return false; }
bool LSRWithInstrQueries() const override { return true; }
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
new file mode 100644
index 0000000000000..654d636045f76
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z16 -passes=loop-vectorize < %s \
+; RUN: | FileCheck %s
+;
+; Test that loop vectorizer generates a vectorized epilogue loop after a VF16
+; vectorization.
+
+define void @fun(ptr %Src, ptr %Dst, i64 %wide.trip.count) {
+; CHECK-LABEL: @fun(
+; CHECK-LABEL: vector.body:
+; CHECK: %wide.load = load <16 x i8>
+; CHECK-LABEL: vec.epilog.vector.body:
+; CHECK: %wide.load8 = load <4 x i8>
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %arrayidx0 = getelementptr i8, ptr %Src, i64 %indvars.iv
+ %0 = load i8, ptr %arrayidx0
+ %arrayidx1 = getelementptr i8, ptr %Dst, i64 %indvars.iv
+ store i8 %0, ptr %arrayidx1
+ %exitcond.not = icmp eq i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
>From 6efc98ddf7c7aaff011c657c37b21e5ed34cf41f Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 19 Dec 2025 20:15:15 +0100
Subject: [PATCH 2/5] Refactor preferEpilogueVectorization to subsume the
interleave check by default. Return true for SystemZ. Leave SystemZ
getMaxInterleaveFactor() to return 1 always for now.
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 5 +++--
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 6 +++++-
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++--
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 2 +-
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | 9 +++------
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h | 4 ++++
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 9 ++-------
.../LoopVectorize/SystemZ/vectorized-epilogue-loop.ll | 2 +-
8 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 8b06b4aae26ce..24607cb5d9f20 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1874,8 +1874,9 @@ class TargetTransformInfo {
LLVM_ABI bool preferPredicatedReductionSelect() const;
/// Return true if the loop vectorizer should consider vectorizing an
- /// otherwise scalar epilogue loop.
- LLVM_ABI bool preferEpilogueVectorization() const;
+ /// otherwise scalar epilogue loop. VF is what was used for the vectorized
+ /// loop body.
+ LLVM_ABI bool preferEpilogueVectorization(ElementCount VF) const;
/// \returns True if the loop vectorizer should discard any VFs where the
/// maximum register pressure exceeds getNumberOfRegisters.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 74857a5b83aba..2a33ff0920fe1 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1087,7 +1087,11 @@ class TargetTransformInfoImplBase {
virtual bool preferPredicatedReductionSelect() const { return false; }
- virtual bool preferEpilogueVectorization() const { return true; }
+ virtual bool preferEpilogueVectorization(ElementCount VF) const {
+ // We consider epilogue vectorization unprofitable for targets that
+ // don't consider interleaving beneficial (eg. MVE).
+ return getMaxInterleaveFactor(VF) > 1;
+ }
virtual bool shouldConsiderVectorizationRegPressure() const { return false; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2961d9361e5fa..def1cbf12b7b5 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1411,8 +1411,8 @@ bool TargetTransformInfo::preferPredicatedReductionSelect() const {
return TTIImpl->preferPredicatedReductionSelect();
}
-bool TargetTransformInfo::preferEpilogueVectorization() const {
- return TTIImpl->preferEpilogueVectorization();
+bool TargetTransformInfo::preferEpilogueVectorization(ElementCount VF) const {
+ return TTIImpl->preferEpilogueVectorization(VF);
}
bool TargetTransformInfo::shouldConsiderVectorizationRegPressure() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6e38951520039..3374813333ab8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -138,7 +138,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool preferAlternateOpcodeVectorization() const override;
- bool preferEpilogueVectorization() const override {
+ bool preferEpilogueVectorization(ElementCount VF) const override {
// Epilogue vectorization is usually unprofitable - tail folding or
// a smaller VF would have been better. This a blunt hammer - we
// should re-examine this once vectorization is better tuned.
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 3b6f8ce08fd73..2851b8bbd5112 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -479,12 +479,9 @@ unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
}
unsigned SystemZTTIImpl::getMaxInterleaveFactor(ElementCount VF) const {
- // Enable vectorization of (LoopVectorizer) epilogue loop after VF 16 main
- // loop. This can be highly beneficial when the original loop handles bytes
- // (i8) and most of the time is not spent in the main vectorized loop
- // (x264/mc_chroma).
- if (VF == ElementCount::getFixed(16))
- return 2;
+ // TODO: Find optimal settings for interleave factors.
+ // if (VF == ElementCount::getFixed(16))
+ // return 2;
return 1;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 929405e946c55..25a7c59a4d856 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -149,6 +149,10 @@ class SystemZTTIImpl final : public BasicTTIImplBase<SystemZTTIImpl> {
getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const override;
+ bool preferEpilogueVectorization(ElementCount VF) const override {
+ return true;
+ }
+
bool shouldExpandReduction(const IntrinsicInst *II) const override;
/// @}
};
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cdc6ecfa21bcb..3c78c544cbe55 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4383,13 +4383,8 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
// account. For now we apply a very crude heuristic and only consider loops
// with vectorization factors larger than a certain value.
- // Allow the target to opt out entirely.
- if (!TTI.preferEpilogueVectorization())
- return false;
-
- // We also consider epilogue vectorization unprofitable for targets that don't
- // consider interleaving beneficial (eg. MVE).
- if (TTI.getMaxInterleaveFactor(VF) <= 1)
+ // Allow the target to opt out.
+ if (!TTI.preferEpilogueVectorization(VF))
return false;
unsigned MinVFThreshold = EpilogueVectorizationMinVF.getNumOccurrences() > 0
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
index 654d636045f76..14998d8f22a46 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
@@ -9,7 +9,7 @@ define void @fun(ptr %Src, ptr %Dst, i64 %wide.trip.count) {
; CHECK-LABEL: vector.body:
; CHECK: %wide.load = load <16 x i8>
; CHECK-LABEL: vec.epilog.vector.body:
-; CHECK: %wide.load8 = load <4 x i8>
+; CHECK: %wide.load7 = load <4 x i8>
entry:
br label %for.body
>From 47ee7f04844c0d226b6d14d98f2710b60ba47796 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Thu, 15 Jan 2026 22:07:13 +0100
Subject: [PATCH 3/5] Generate test checks for the vectorized-epilogue-loop.ll
test.
---
.../SystemZ/vectorized-epilogue-loop.ll | 80 +++++++++++++++++--
1 file changed, 75 insertions(+), 5 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
index 14998d8f22a46..4072871ac8dce 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/vectorized-epilogue-loop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z16 -passes=loop-vectorize < %s \
; RUN: | FileCheck %s
;
@@ -5,11 +6,72 @@
; vectorization.
define void @fun(ptr %Src, ptr %Dst, i64 %wide.trip.count) {
-; CHECK-LABEL: @fun(
-; CHECK-LABEL: vector.body:
-; CHECK: %wide.load = load <16 x i8>
-; CHECK-LABEL: vec.epilog.vector.body:
-; CHECK: %wide.load7 = load <4 x i8>
+; CHECK-LABEL: define void @fun(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[WIDE_TRIP_COUNT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ITER_CHECK:.*]]:
+; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
+; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[WIDE_TRIP_COUNT]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[SRC2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[TMP0]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; CHECK: [[VEC_EPILOG_PH]]:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
+; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX6]]
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
+; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD7]], ptr [[TMP6]], align 1
+; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX6]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC5]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
+; CHECK-NEXT: br i1 [[CMP_N9]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX0]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 [[TMP8]], ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
entry:
br label %for.body
@@ -26,3 +88,11 @@ for.body:
exit:
ret void
}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 4, i32 12}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+;.
>From e371a2e2e927bd3fd2911b0ed71828b539539c11 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <Jonas.Paulsson2 at ibm.com>
Date: Tue, 20 Jan 2026 10:24:09 -0600
Subject: [PATCH 4/5] Commenting.
Co-authored-by: Florian Hahn <flo at fhahn.com>
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 24607cb5d9f20..cde8637f892b3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1874,8 +1874,7 @@ class TargetTransformInfo {
LLVM_ABI bool preferPredicatedReductionSelect() const;
/// Return true if the loop vectorizer should consider vectorizing an
- /// otherwise scalar epilogue loop. VF is what was used for the vectorized
- /// loop body.
+ /// otherwise scalar epilogue loop if the loop already has been vectorized with \p VF.
LLVM_ABI bool preferEpilogueVectorization(ElementCount VF) const;
/// \returns True if the loop vectorizer should discard any VFs where the
>From 5650f44c3e9e03eecd0fb47e789a7266abbe39af Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <Jonas.Paulsson2 at ibm.com>
Date: Tue, 20 Jan 2026 10:25:53 -0600
Subject: [PATCH 5/5] clang-format
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cde8637f892b3..d0ff44cdf69a7 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1874,7 +1874,8 @@ class TargetTransformInfo {
LLVM_ABI bool preferPredicatedReductionSelect() const;
/// Return true if the loop vectorizer should consider vectorizing an
- /// otherwise scalar epilogue loop if the loop already has been vectorized with \p VF.
+ /// otherwise scalar epilogue loop if the loop already has been vectorized
+ /// with \p VF.
LLVM_ABI bool preferEpilogueVectorization(ElementCount VF) const;
/// \returns True if the loop vectorizer should discard any VFs where the
More information about the llvm-commits
mailing list