[llvm] [LV] Extend FindLastIV to unsigned case (PR #141752)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 05:34:11 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
In an effort to not have two different RecurKinds, one for the signed case, and another for the unsigned case, introduce RecurrenceDescriptor::isReduxSigned() to indicate whether the the RecurKind is of the signed or unsigned variant. Demonstrate its use by extending FindLastIV to the unsigned case.
---
Patch is 31.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141752.diff
5 Files Affected:
- (modified) llvm/include/llvm/Analysis/IVDescriptors.h (+24-13)
- (modified) llvm/lib/Analysis/IVDescriptors.cpp (+35-21)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+4-3)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+146-56)
``````````diff
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index d94ffa7287db3..db2731869ed20 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -79,12 +79,13 @@ class RecurrenceDescriptor {
RecurrenceDescriptor(Value *Start, Instruction *Exit, StoreInst *Store,
RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
- Type *RT, bool Signed, bool Ordered,
- SmallPtrSetImpl<Instruction *> &CI,
+ Type *RT, bool IsResultSigned, bool IsReduxSigned,
+ bool Ordered, SmallPtrSetImpl<Instruction *> &CI,
unsigned MinWidthCastToRecurTy)
: IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
- IsSigned(Signed), IsOrdered(Ordered),
+ IsResultSigned(IsResultSigned), IsReduxSigned(IsReduxSigned),
+ IsOrdered(Ordered),
MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
CastInsts.insert_range(CI);
}
@@ -96,12 +97,14 @@ class RecurrenceDescriptor {
: IsRecurrence(IsRecur), PatternLastInst(I),
RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
- InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
- : IsRecurrence(true), PatternLastInst(I), RecKind(K),
- ExactFPMathInst(ExactFP) {}
+ InstDesc(Instruction *I, RecurKind K, bool IsSigned = false)
+ : IsRecurrence(true), IsSigned(IsSigned), PatternLastInst(I),
+ RecKind(K) {}
bool isRecurrence() const { return IsRecurrence; }
+ bool isSigned() const { return IsSigned; }
+
bool needsExactFPMath() const { return ExactFPMathInst != nullptr; }
Instruction *getExactFPMathInst() const { return ExactFPMathInst; }
@@ -113,13 +116,15 @@ class RecurrenceDescriptor {
private:
// Is this instruction a recurrence candidate.
bool IsRecurrence;
+ // Is this recurrence a signed variant.
+ bool IsSigned = false;
// The last instruction in a min/max pattern (select of the select(icmp())
// pattern), or the current recurrence instruction otherwise.
Instruction *PatternLastInst;
// If this is a min/max pattern.
RecurKind RecKind;
// Recurrence does not allow floating-point reassociation.
- Instruction *ExactFPMathInst;
+ Instruction *ExactFPMathInst = nullptr;
};
/// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -268,8 +273,9 @@ class RecurrenceDescriptor {
Value *getSentinelValue() const {
assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind");
Type *Ty = StartValue->getType();
- return ConstantInt::get(Ty,
- APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
+ unsigned BW = Ty->getIntegerBitWidth();
+ return ConstantInt::get(Ty, isReduxSigned() ? APInt::getSignedMinValue(BW)
+ : APInt::getMinValue(BW));
}
/// Returns a reference to the instructions used for type-promoting the
@@ -281,8 +287,11 @@ class RecurrenceDescriptor {
return MinWidthCastToRecurrenceType;
}
- /// Returns true if all source operands of the recurrence are SExtInsts.
- bool isSigned() const { return IsSigned; }
+ /// Returns true if the reduction result is signed.
+ bool isResultSigned() const { return IsResultSigned; }
+
+ /// Returns true if the reduction redux is signed.
+ bool isReduxSigned() const { return IsReduxSigned; }
/// Expose an ordered FP reduction to the instance users.
bool isOrdered() const { return IsOrdered; }
@@ -318,8 +327,10 @@ class RecurrenceDescriptor {
Instruction *ExactFPMathInst = nullptr;
// The type of the recurrence.
Type *RecurrenceType = nullptr;
- // True if all source operands of the recurrence are SExtInsts.
- bool IsSigned = false;
+ // True if reduction result is signed.
+ bool IsResultSigned = false;
+ // True if reduction redux is signed.
+ bool IsReduxSigned = false;
// True if this recurrence can be treated as an in-order reduction.
// Currently only a non-reassociative FAdd can be considered in-order,
// if it is also the only FAdd in the PHI's use chain.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b7c7bcab168cc..8d568236f0053 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -88,7 +88,7 @@ static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
}
/// Compute the minimal bit width needed to represent a reduction whose exit
-/// instruction is given by Exit.
+/// instruction is given by Exit, along with its signedness.
static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
DemandedBits *DB,
AssumptionCache *AC,
@@ -255,7 +255,7 @@ bool RecurrenceDescriptor::AddReductionVar(
SmallPtrSet<Instruction *, 4> CastInsts;
unsigned MinWidthCastToRecurrenceType;
Instruction *Start = Phi;
- bool IsSigned = false;
+ bool IsResultSigned = false, IsReduxSigned = false;
SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist;
@@ -396,6 +396,7 @@ bool RecurrenceDescriptor::AddReductionVar(
// state accurate while processing the worklist?
if (ReduxDesc.getRecKind() != RecurKind::None)
Kind = ReduxDesc.getRecKind();
+ IsReduxSigned = ReduxDesc.isSigned();
}
bool IsASelect = isa<SelectInst>(Cur);
@@ -565,7 +566,7 @@ bool RecurrenceDescriptor::AddReductionVar(
// smaller type. We should just generate a correctly typed expression
// to begin with.
Type *ComputedType;
- std::tie(ComputedType, IsSigned) =
+ std::tie(ComputedType, IsResultSigned) =
computeRecurrenceType(ExitInstruction, DB, AC, DT);
if (ComputedType != RecurrenceType)
return false;
@@ -595,8 +596,9 @@ bool RecurrenceDescriptor::AddReductionVar(
// Save the description of this reduction variable.
RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
- FMF, ExactFPMathInst, RecurrenceType, IsSigned,
- IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+ FMF, ExactFPMathInst, RecurrenceType, IsResultSigned,
+ IsReduxSigned, IsOrdered, CastInsts,
+ MinWidthCastToRecurrenceType);
RedDes = RD;
return true;
@@ -700,47 +702,59 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
m_Value(NonRdxPhi)))))
return InstDesc(false, I);
- auto IsIncreasingLoopInduction = [&](Value *V) {
+ // Returns a non-nullopt boolean indicating the signedness of the recurrence
+ // when a valid FindLastIV pattern is found.
+ auto GetInductionSignedness = [&](Value *V) -> std::optional<bool> {
Type *Ty = V->getType();
if (!SE.isSCEVable(Ty))
- return false;
+ return std::nullopt;
auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V));
if (!AR || AR->getLoop() != TheLoop)
- return false;
+ return std::nullopt;
const SCEV *Step = AR->getStepRecurrence(SE);
if (!SE.isKnownPositive(Step))
- return false;
+ return std::nullopt;
- const ConstantRange IVRange = SE.getSignedRange(AR);
+ const ConstantRange SignedIVRange = SE.getSignedRange(AR);
+ const ConstantRange UnsignedIVRange = SE.getUnsignedRange(AR);
unsigned NumBits = Ty->getIntegerBitWidth();
// Keep the minimum value of the recurrence type as the sentinel value.
// The maximum acceptable range for the increasing induction variable,
// called the valid range, will be defined as
// [<sentinel value> + 1, <sentinel value>)
- // where <sentinel value> is SignedMin(<recurrence type>)
+ // where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>)
// TODO: This range restriction can be lifted by adding an additional
// virtual OR reduction.
- const APInt Sentinel = APInt::getSignedMinValue(NumBits);
- const ConstantRange ValidRange =
- ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
- LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange
- << ", and the signed range of " << *AR << " is "
- << IVRange << "\n");
+ const APInt SignedSentinel = APInt::getSignedMinValue(NumBits);
+ const APInt UnsignedSentinel = APInt::getMinValue(NumBits);
+ const ConstantRange ValidSignedRange =
+ ConstantRange::getNonEmpty(SignedSentinel + 1, SignedSentinel);
+ const ConstantRange ValidUnsignedRange =
+ ConstantRange::getNonEmpty(UnsignedSentinel + 1, UnsignedSentinel);
+ LLVM_DEBUG(dbgs() << "LV: FindLastIV valid signed range is "
+ << ValidSignedRange << ", valid unsigned range is "
+ << ValidUnsignedRange << ", " << *AR
+ << " signed range is " << SignedIVRange
+ << ", and unsigned range is " << UnsignedIVRange << "\n");
// Ensure the induction variable does not wrap around by verifying that its
// range is fully contained within the valid range.
- return ValidRange.contains(IVRange);
+ if (ValidSignedRange.contains(SignedIVRange))
+ return true;
+ if (ValidUnsignedRange.contains(UnsignedIVRange))
+ return false;
+ return std::nullopt;
};
// We are looking for selects of the form:
// select(cmp(), phi, increasing_loop_induction) or
// select(cmp(), increasing_loop_induction, phi)
// TODO: Support for monotonically decreasing induction variable
- if (!IsIncreasingLoopInduction(NonRdxPhi))
- return InstDesc(false, I);
+ if (auto IsSigned = GetInductionSignedness(NonRdxPhi))
+ return InstDesc(I, RecurKind::FindLastIV, *IsSigned);
- return InstDesc(I, RecurKind::FindLastIV);
+ return InstDesc(false, I);
}
RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 05b5764ffcafc..e5c13f7ddfb68 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9535,7 +9535,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
auto *Trunc =
new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
auto *Extnd =
- RdxDesc.isSigned()
+ RdxDesc.isResultSigned()
? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
: new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 18229780bc4a5..9404423c2afcd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -621,8 +621,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
unsigned UF = getNumOperands() - 2;
Value *ReducedPartRdx = State.get(getOperand(2));
for (unsigned Part = 1; Part < UF; ++Part) {
- ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
- State.get(getOperand(2 + Part)));
+ ReducedPartRdx = createMinMaxOp(
+ Builder, RdxDesc.isReduxSigned() ? RecurKind::SMax : RecurKind::UMax,
+ ReducedPartRdx, State.get(getOperand(2 + Part)));
}
return createFindLastIVReduction(Builder, ReducedPartRdx,
@@ -696,7 +697,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (PhiTy != RdxDesc.getRecurrenceType())
- ReducedPartRdx = RdxDesc.isSigned()
+ ReducedPartRdx = RdxDesc.isResultSigned()
? Builder.CreateSExt(ReducedPartRdx, PhiTy)
: Builder.CreateZExt(ReducedPartRdx, PhiTy);
}
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
index e4597ebfe7dc8..a497ffcb9923a 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4
@@ -843,66 +843,184 @@ exit: ; preds = %for.body, %entry
; This test cannot be vectorized, even with a runtime check.
; The construct that are introduced by IndVarSimplify is:
; %1 = trunc i64 %iv to i32
-; However, the loop exit condition is a constant that overflows i32:
+; The loop exit condition is a constant that overflows signed i32,
+; but not unsigned i32:
; %exitcond.not = icmp eq i64 %inc, 4294967294
-; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
-; cannot vectorize this case.
-define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) {
-; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
+; Hence, we can vectorize with the unsigned variant of FindLastIV.
+define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) {
+; CHECK-VF4IC1-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
+; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-VF4IC1: [[VECTOR_PH]]:
+; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-VF4IC1: [[VECTOR_BODY]]:
+; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 2147483646, i32 2147483647, i32 -2147483648, i32 -2147483647>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT: [[IV:%.*]] = add i64 2147483646, [[INDEX]]
+; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 0
+; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-VF4IC1-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
+; CHECK-VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
+; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]])
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP5]], i32 331
+; CHECK-VF4IC1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-VF4IC1: [[SCALAR_PH]]:
+; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967294, %[[MIDDLE_BLOCK]] ], [ 2147483646, %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
-; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
-; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]]
+; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
-; CHECK-VF4IC1-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-VF4IC1-NEXT: [[CONV:%.*]] = trunc i64 [[IV1]] to i32
; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
-; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV1]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
-; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
-; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
;
-; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
+; CHECK-VF4IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-VF4IC4: [[VECTOR_PH]]:
+; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-VF4IC4: [[VECTOR_BODY]]:
+; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 2147483646, i32 2147483647, i32 -2147483648, i32 -2147483647>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
+; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]]
+; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4
+; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8
+; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelemen...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141752
More information about the llvm-commits
mailing list