[llvm] [LV] Extend FindFirstIV to unsigned case (PR #146386)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 6 11:36:48 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/146386
>From a2c35f98feda810a732395e47b65b40cf6ab59cc Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 30 Jun 2025 17:23:42 +0100
Subject: [PATCH 1/3] [LV] Extend FindFirstIV to unsigned case
Extend FindFirstIV vectorization to the unsigned case by introducing and
handling FindFirstIVUMin.
---
llvm/include/llvm/Analysis/IVDescriptors.h | 6 +-
llvm/lib/Analysis/IVDescriptors.cpp | 11 +-
.../Transforms/Vectorize/SLPVectorizer.cpp | 3 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 +-
.../LoopVectorize/iv-select-cmp-decreasing.ll | 238 +++++++++++++++---
5 files changed, 217 insertions(+), 51 deletions(-)
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 3b92cbff28de4..b985292ccee40 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -57,6 +57,9 @@ enum class RecurKind {
FindFirstIVSMin, /// FindFirst reduction with select(icmp(),x,y) where one of
///< (x,y) is a decreasing loop induction, and both x and y
///< are integer type, producing a SMin reduction.
+ FindFirstIVUMin, /// FindFirst reduction with select(icmp(),x,y) where one of
+ ///< (x,y) is a decreasing loop induction, and both x and y
+ ///< are integer type, producing a UMin reduction.
FindLastIVSMax, ///< FindLast reduction with select(cmp(),x,y) where one of
///< (x,y) is increasing loop induction, and both x and y
///< are integer type, producing a SMax reduction.
@@ -265,7 +268,8 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is of the form
/// select(cmp(),x,y) where one of (x,y) is decreasing loop induction.
static bool isFindFirstIVRecurrenceKind(RecurKind Kind) {
- return Kind == RecurKind::FindFirstIVSMin;
+ return Kind == RecurKind::FindFirstIVSMin ||
+ Kind == RecurKind::FindFirstIVUMin;
}
/// Returns true if the recurrence kind is of the form
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b275b1064cef2..ef9f5fc0d1afd 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -51,6 +51,7 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::UMin:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
+ case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
return true;
@@ -741,10 +742,9 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
: APInt::getMinValue(NumBits);
ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
} else {
- assert(IsSigned && "Only FindFirstIV with SMax is supported currently");
- ValidRange =
- ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
- APInt::getSignedMaxValue(NumBits) - 1);
+ APInt Sentinel = IsSigned ? APInt::getSignedMaxValue(NumBits)
+ : APInt::getMaxValue(NumBits);
+ ValidRange = ConstantRange::getNonEmpty(Sentinel, Sentinel - 1);
}
LLVM_DEBUG(dbgs() << "LV: "
@@ -770,6 +770,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
if (CheckRange(true))
return RecurKind::FindFirstIVSMin;
+ if (CheckRange(false))
+ return RecurKind::FindFirstIVUMin;
return std::nullopt;
};
@@ -1183,6 +1185,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
return Instruction::Mul;
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
+ case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::Or:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c93af749507f8..1db363aac2181 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -23181,6 +23181,7 @@ class HorizontalReduction {
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
+ case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
@@ -23317,6 +23318,7 @@ class HorizontalReduction {
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
+ case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
@@ -23418,6 +23420,7 @@ class HorizontalReduction {
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
+ case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 06511b61a67c3..e02a241000dab 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -764,14 +764,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *ReducedPartRdx = State.get(getOperand(3));
RecurKind MinMaxKind;
bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RK);
- if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
- } else {
- assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK) &&
- "Kind must either be FindLastIV or FindFirstIV");
- assert(IsSigned && "Only FindFirstIV with SMax is currently supported");
- MinMaxKind = RecurKind::SMin;
- }
+ else
+ MinMaxKind = IsSigned ? RecurKind::SMin : RecurKind::UMin;
for (unsigned Part = 1; Part < UF; ++Part)
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
State.get(getOperand(3 + Part)));
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index d224da795997d..2186ebb067224 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -941,6 +941,205 @@ exit: ; preds = %loop
ret i16 %spec.select.lcssa
}
+; The signed sentinel value for decreasing-IV vectorization is LONG_MAX, and since
+; the IV hits this value with smin vectorization, it needs to be vectorized with a
+; an unsigned sentinel and umin instead.
+define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) {
+; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
+; IC1VF4-SAME: ptr [[A:%.*]]) {
+; IC1VF4-NEXT: [[ENTRY:.*]]:
+; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC1VF4: [[VECTOR_PH]]:
+; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC1VF4: [[VECTOR_BODY]]:
+; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 9223372036854775807, i64 9223372036854775806, i64 9223372036854775805, i64 9223372036854775804>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
+; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
+; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
+; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
+; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
+; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
+; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; IC1VF4: [[MIDDLE_BLOCK]]:
+; IC1VF4-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP4]])
+; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -1
+; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331
+; IC1VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC1VF4: [[SCALAR_PH]]:
+; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 9223372036854775807, %[[ENTRY]] ]
+; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC1VF4-NEXT: br label %[[LOOP:.*]]
+; IC1VF4: [[LOOP]]:
+; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; IC1VF4: [[EXIT]]:
+; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
+; IC4VF4-SAME: ptr [[A:%.*]]) {
+; IC4VF4-NEXT: [[ENTRY:.*]]:
+; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF4: [[VECTOR_PH]]:
+; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF4: [[VECTOR_BODY]]:
+; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 9223372036854775807, i64 9223372036854775806, i64 9223372036854775805, i64 9223372036854775804>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
+; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4)
+; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4)
+; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
+; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
+; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
+; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4
+; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3
+; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8
+; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3
+; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12
+; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3
+; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
+; IC4VF4-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
+; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
+; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; IC4VF4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
+; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
+; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4)
+; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
+; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; IC4VF4: [[MIDDLE_BLOCK]]:
+; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]])
+; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]])
+; IC4VF4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]])
+; IC4VF4-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX11]])
+; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], -1
+; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331
+; IC4VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF4: [[SCALAR_PH]]:
+; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 9223372036854775807, %[[ENTRY]] ]
+; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC4VF4-NEXT: br label %[[LOOP:.*]]
+; IC4VF4: [[LOOP]]:
+; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; IC4VF4: [[EXIT]]:
+; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
+; IC4VF1-SAME: ptr [[A:%.*]]) {
+; IC4VF1-NEXT: [[ENTRY:.*]]:
+; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF1: [[VECTOR_PH]]:
+; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF1: [[VECTOR_BODY]]:
+; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
+; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
+; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
+; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; IC4VF1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8
+; IC4VF1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
+; IC4VF1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
+; IC4VF1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
+; IC4VF1-NEXT: [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3
+; IC4VF1-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3
+; IC4VF1-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3
+; IC4VF1-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3
+; IC4VF1-NEXT: [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
+; IC4VF1-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]]
+; IC4VF1-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]]
+; IC4VF1-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]]
+; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC4VF1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
+; IC4VF1-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; IC4VF1: [[MIDDLE_BLOCK]]:
+; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP15]], i64 [[TMP16]])
+; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]])
+; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]])
+; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -1
+; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331
+; IC4VF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF1: [[SCALAR_PH]]:
+; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 9223372036854775807, %[[ENTRY]] ]
+; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC4VF1-NEXT: br label %[[LOOP:.*]]
+; IC4VF1: [[LOOP]]:
+; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; IC4VF1: [[EXIT]]:
+; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop: ; preds = %entry, %loop
+ %iv = phi i64 [ 9223372036854775807, %entry ], [ %iv.next, %loop ]
+ %rdx = phi i64 [ 331, %entry ], [ %spec.select, %loop ]
+ %gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv
+ %ld.a = load i64, ptr %gep.a.iv, align 8
+ %cmp.a.3 = icmp sgt i64 %ld.a, 3
+ %spec.select = select i1 %cmp.a.3, i64 %iv, i64 %rdx
+ %iv.next = add nsw i64 %iv, -1
+ %exit.cond = icmp eq i64 %iv, 0
+ br i1 %exit.cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i64 %spec.select
+}
+
define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
@@ -981,42 +1180,3 @@ loop: ; preds = %entry, %loop
exit: ; preds = %loop
ret i64 %cond
}
-
-; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since
-; the IV hits this value, it is impossible to vectorize this case.
-define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) {
-; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
-;
-entry:
- br label %loop
-
-loop: ; preds = %entry, %loop
- %iv = phi i64 [ 9223372036854775807, %entry ], [ %iv.next, %loop ]
- %rdx = phi i64 [ 331, %entry ], [ %spec.select, %loop ]
- %gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv
- %ld.a = load i64, ptr %gep.a.iv, align 8
- %cmp.a.3 = icmp sgt i64 %ld.a, 3
- %spec.select = select i1 %cmp.a.3, i64 %iv, i64 %rdx
- %iv.next = add nsw i64 %iv, -1
- %exit.cond = icmp eq i64 %iv, 0
- br i1 %exit.cond, label %exit, label %loop
-
-exit: ; preds = %loop
- ret i64 %spec.select
-}
>From e3b6f0b96f69a4a505929019379447e613e5d993 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 3 Jul 2025 12:13:41 +0100
Subject: [PATCH 2/3] [LV] Add negative test
---
.../LoopVectorize/iv-select-cmp-decreasing.ll | 39 +++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index 2186ebb067224..4a0735af3170d 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -1140,6 +1140,45 @@ exit: ; preds = %loop
ret i64 %spec.select
}
+; The unsigned sentinel value for decreasing-IV vectorization is ULONG_MAX,
+; and since the IV hits this value, it is impossible to vectorize this case.
+define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) {
+; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop: ; preds = %entry, %loop
+ %iv = phi i64 [ -1, %entry ], [ %iv.next, %loop ]
+ %rdx = phi i64 [ 331, %entry ], [ %spec.select, %loop ]
+ %gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv
+ %ld.a = load i64, ptr %gep.a.iv, align 8
+ %cmp.a.3 = icmp sgt i64 %ld.a, 3
+ %spec.select = select i1 %cmp.a.3, i64 %iv, i64 %rdx
+ %iv.next = add nsw i64 %iv, -1
+ %exit.cond = icmp eq i64 %iv, 0
+ br i1 %exit.cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i64 %spec.select
+}
+
define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
>From 766a0a32e24bfe00396c6a39bf1b2a4620106f4d Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sun, 6 Jul 2025 19:35:25 +0100
Subject: [PATCH 3/3] [IVDesc] Clarify wrapped range
---
llvm/lib/Analysis/IVDescriptors.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index ef9f5fc0d1afd..37901f1e7fe41 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -720,16 +720,13 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
(isFindLastIVRecurrenceKind(Kind) && !SE.isKnownPositive(Step)))
return std::nullopt;
- // Keep the minimum value of the recurrence type as the sentinel value.
- // The maximum acceptable range for the increasing induction variable,
- // called the valid range, will be defined as
-
// Keep the minimum (FindLast) or maximum (FindFirst) value of the
// recurrence type as the sentinel value. The maximum acceptable range for
// the induction variable, called the valid range, will be defined as
// [<sentinel value> + 1, <sentinel value>)
// where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>) for
- // FindLastIV or [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV.
+ // FindLastIV or [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV,
+ // noting that this is a wrapped range since Start > End.
// TODO: This range restriction can be lifted by adding an additional
// virtual OR reduction.
auto CheckRange = [&](bool IsSigned) {
@@ -744,7 +741,7 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
} else {
APInt Sentinel = IsSigned ? APInt::getSignedMaxValue(NumBits)
: APInt::getMaxValue(NumBits);
- ValidRange = ConstantRange::getNonEmpty(Sentinel, Sentinel - 1);
+ ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
}
LLVM_DEBUG(dbgs() << "LV: "
More information about the llvm-commits
mailing list