[llvm] LV: refine loop-invariance checks (PR #127516)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 08:12:05 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
After auditing LoopVectorize, it was found that it was using LoopInfo::isLoopInvariant in several places, skipping the more powerful SCEV isLoopInvariant check. LoopVectorizationLegality already has a routine called isInvariant, which in turn calls into LoopAccessAnalysis. Fix a deficiency in LAA's routine, and use it more widely in place of LoopInfo::isLoopInvariant to correctly find invariant values while vectorizing. There is additionally the LoopVectorizationCostModel routine shouldConsiderInvariant, which is even more powerful, but is not maximally used: fix this too.
-- 8< --
Needs #<!-- -->125365 to squash regressions.
---
Patch is 26.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127516.diff
8 Files Affected:
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+2-2)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+17-25)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll (+32-47)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll (+6-5)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll (+4-3)
- (modified) llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll (+7-6)
- (modified) llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll (+3-3)
``````````diff
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7d6dbd51a404d..4a733ff2395c5 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2807,8 +2807,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
bool LoopAccessInfo::isInvariant(Value *V) const {
auto *SE = PSE->getSE();
- // TODO: Is this really what we want? Even without FP SCEV, we may want some
- // trivially loop-invariant FP values to be considered invariant.
+ if (TheLoop->isLoopInvariant(V))
+ return true;
if (!SE->isSCEVable(V->getType()))
return false;
const SCEV *S = SE->getSCEV(V);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8c41f896ad622..12148e1cdd8f4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1567,7 +1567,7 @@ class LoopVectorizationCostModel {
/// Returns true if \p Op should be considered invariant and if it is
/// trivially hoistable.
- bool shouldConsiderInvariant(Value *Op);
+ bool shouldConsiderInvariant(Value *Op) const;
/// Return the value of vscale used for tuning the cost model.
std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
@@ -1763,8 +1763,7 @@ class LoopVectorizationCostModel {
/// extracted.
bool needsExtract(Value *V, ElementCount VF) const {
Instruction *I = dyn_cast<Instruction>(V);
- if (VF.isScalar() || !I || !TheLoop->contains(I) ||
- TheLoop->isLoopInvariant(I) ||
+ if (VF.isScalar() || !I || shouldConsiderInvariant(I) ||
getWideningDecision(I, VF) == CM_Scalarize)
return false;
@@ -3118,7 +3117,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// A helper that returns true if the given value is a getelementptr
// instruction contained in the loop.
auto IsLoopVaryingGEP = [&](Value *V) {
- return isa<GetElementPtrInst>(V) && !TheLoop->isLoopInvariant(V);
+ return isa<GetElementPtrInst>(V) && !shouldConsiderInvariant(V);
};
// A helper that evaluates a memory access's use of a pointer. If the use will
@@ -3346,14 +3345,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// is correct. The easiest form of the later is to require that all values
// stored are the same.
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
- TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
+ Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
}
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// If the divisor is loop-invariant no predication is needed.
- return !TheLoop->isLoopInvariant(I->getOperand(1));
+ return !Legal->isInvariant(I->getOperand(1));
}
}
@@ -3410,7 +3409,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
Value *Op2 = I->getOperand(1);
auto Op2Info = TTI.getOperandInfo(Op2);
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
- Legal->isInvariant(Op2))
+ shouldConsiderInvariant(Op2))
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());
@@ -3600,7 +3599,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// assuming aliasing and ordering which have already been checked.
return true;
// Storing the same value on every iteration.
- return TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand());
+ return Legal->isInvariant(cast<StoreInst>(I)->getValueOperand());
};
auto IsUniformDecision = [&](Instruction *I, ElementCount VF) {
@@ -5630,12 +5629,10 @@ static const SCEV *getAddressAccessSCEV(
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
- auto *SE = PSE.getSE();
unsigned NumOperands = Gep->getNumOperands();
for (unsigned Idx = 1; Idx < NumOperands; ++Idx) {
Value *Opd = Gep->getOperand(Idx);
- if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
- !Legal->isInductionVariable(Opd))
+ if (!Legal->isInvariant(Opd) && !Legal->isInductionVariable(Opd))
return nullptr;
}
@@ -5747,9 +5744,8 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy, {},
CostKind);
}
- StoreInst *SI = cast<StoreInst>(I);
- bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
+ bool IsLoopInvariantStoreValue = shouldConsiderInvariant(I);
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
@@ -5900,7 +5896,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
match(Op0, m_ZExtOrSExt(m_Value())) &&
Op0->getOpcode() == Op1->getOpcode() &&
Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() &&
- !TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1) &&
+ !shouldConsiderInvariant(Op0) && !shouldConsiderInvariant(Op1) &&
(Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
// Matched reduce.add(ext(mul(ext(A), ext(B)))
@@ -5927,7 +5923,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost)
return I == RetI ? RedCost : 0;
} else if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) &&
- !TheLoop->isLoopInvariant(RedOp)) {
+ !shouldConsiderInvariant(RedOp)) {
// Matched reduce(ext(A))
bool IsUnsigned = isa<ZExtInst>(RedOp);
auto *ExtType = VectorType::get(RedOp->getOperand(0)->getType(), VectorTy);
@@ -5943,8 +5939,8 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
} else if (RedOp && RdxDesc.getOpcode() == Instruction::Add &&
match(RedOp, m_Mul(m_Instruction(Op0), m_Instruction(Op1)))) {
if (match(Op0, m_ZExtOrSExt(m_Value())) &&
- Op0->getOpcode() == Op1->getOpcode() &&
- !TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1)) {
+ Op0->getOpcode() == Op1->getOpcode() && !shouldConsiderInvariant(Op0) &&
+ !shouldConsiderInvariant(Op1)) {
bool IsUnsigned = isa<ZExtInst>(Op0);
Type *Op0Ty = Op0->getOperand(0)->getType();
Type *Op1Ty = Op1->getOperand(0)->getType();
@@ -6097,8 +6093,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
// A uniform store isn't neccessarily uniform-by-part
// and we can't assume scalarization.
- auto &SI = cast<StoreInst>(I);
- return TheLoop->isLoopInvariant(SI.getValueOperand());
+ return shouldConsiderInvariant(&I);
};
const InstructionCost GatherScatterCost =
@@ -6331,8 +6326,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
case VFParamKind::OMP_Uniform: {
Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
// Make sure the scalar parameter in the loop is invariant.
- if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(ScalarParam),
- TheLoop))
+ if (!Legal->isInvariant(ScalarParam))
ParamsOk = false;
break;
}
@@ -6405,7 +6399,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
}
}
-bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
+bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) const {
if (!Legal->isInvariant(Op))
return false;
// Consider Op invariant, if it or its operands aren't predicated
@@ -6441,7 +6435,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
- auto *SE = PSE.getSE();
auto HasSingleCopyAfterVectorization = [this](Instruction *I,
ElementCount VF) -> bool {
@@ -6687,8 +6680,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
- bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+ bool ScalarCond = shouldConsiderInvariant(SI->getCondition());
const Value *Op0, *Op1;
using namespace llvm::PatternMatch;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 305d26d7f3bc1..984e25693e379 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -7,60 +7,45 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48)
-; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52)
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP12]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 3, [[TMP2]]
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[A]], 48
+; CHECK-NEXT: [[TMP6:%.*]] = ashr i64 [[TMP5]], 52
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP4]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY1]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
-; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
-; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[VECTOR_BODY]]
-; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
-; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
-; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.continue4:
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
-; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
-; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
-; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
-; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
-; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
-; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP0]], i32 3)
+; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[TMP14:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0
+; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP15]], i32 [[TMP8]], i32 [[TMP7]]
+; CHECK-NEXT: [[TMP17:%.*]] = shl i32 [[PREDPHI]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP17]] to i8
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
-; CHECK: pred.store.continue8:
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
index 68b36f23de4b0..1d95a95c3d8f8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
@@ -27,11 +27,12 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> [[VP_OP_LOAD]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> [[TMP7]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: [[VP_OP1:%.*]] = call <vscale x 1 x i16> @llvm.vp.lshr.nxv1i16(<vscale x 1 x i16> [[VP_OP]], <vscale x 1 x i16> trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>), <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> [[VP_OP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> [[TMP8]], <vscale x 1 x ptr> align 1 zeroinitializer, <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i8> [[VP_OP_LOAD]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = mul i32 0, [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
+; CHECK-NEXT: store i8 [[TMP14]], ptr null, align 1
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
index 7de51bc3a8a68..71f7446f30b5c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
@@ -45,15 +45,16 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
-; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <vscale x 8 x i32> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP19]], 0
; CHECK-NEXT: [[VP_OP2:%.*]] = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP3:%.*]] = call <vscale x 8 x i32> @llvm.vp.or.nxv8i32(<vscale x 8 x i32> [[VP_OP2]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer, i32 [[TMP11]])
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> [[TMP17]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: call void ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/127516
More information about the llvm-commits
mailing list