[llvm] e36cd26 - [VPlan] Remove non-reductions after simplifications. (#176795)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 28 07:51:15 PST 2026
Author: Florian Hahn
Date: 2026-01-28T15:51:08Z
New Revision: e36cd26618c773bf2b4163afe03ccef7f81f12e4
URL: https://github.com/llvm/llvm-project/commit/e36cd26618c773bf2b4163afe03ccef7f81f12e4
DIFF: https://github.com/llvm/llvm-project/commit/e36cd26618c773bf2b4163afe03ccef7f81f12e4.diff
LOG: [VPlan] Remove non-reductions after simplifications. (#176795)
In some cases, we identify patterns as reductions, even though they can
be simplified to a non-reduction.
Mark VPReductionPHIRecipe as not reading from memory & not having
side-effects, to clean them up.
We also need to remove ComputeReductionResult VPInstructions with
live-in arguments. This means there is actually no reduction, and we
need to fold it to the live in. Otherwise we would incorrectly reduce
the live-in.
PR: https://github.com/llvm/llvm-project/pull/176795
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanUtils.h
llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index dd04828f0bd6e..6f605ac877e1b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -248,6 +248,9 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
/// Match a VPValue, capturing it if we match.
inline bind_ty<VPValue> m_VPValue(VPValue *&V) { return V; }
+/// Match a VPIRValue.
+inline bind_ty<VPIRValue> m_VPIRValue(VPIRValue *&V) { return V; }
+
/// Match a VPInstruction, capturing if we match.
inline bind_ty<VPInstruction> m_VPInstruction(VPInstruction *&V) { return V; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 942296145ccf0..62fb0e4d220ec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -127,6 +127,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPBranchOnMaskSC:
case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
+ case VPReductionPHISC:
case VPPredInstPHISC:
case VPScalarIVStepsSC:
case VPWidenStoreEVLSC:
@@ -162,6 +163,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();
case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
+ case VPReductionPHISC:
case VPPredInstPHISC:
case VPVectorEndPointerSC:
return false;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6bff955aae9db..478e72e6d6cfc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1510,6 +1510,11 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
+ VPIRValue *IRV;
+ if (Def->getNumOperands() == 1 &&
+ match(Def, m_ComputeReductionResult(m_VPIRValue(IRV))))
+ return Def->replaceAllUsesWith(IRV);
+
// Some simplifications can only be applied after unrolling. Perform them
// below.
if (!Plan->isUnrolled())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 204a9627e120e..31248cca50543 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -113,9 +113,9 @@ inline VPRecipeBase *findRecipe(VPValue *Start, PredT Pred) {
for (unsigned I = 0; I != Worklist.size(); ++I) {
VPValue *Cur = Worklist[I];
auto *R = Cur->getDefiningRecipe();
- // TODO: Skip live-ins once no degenerate reductions (ones with constant
- // backedge values) are generated.
- if (R && Pred(R))
+ if (!R)
+ continue;
+ if (Pred(R))
return R;
for (VPUser *U : Cur->users()) {
for (VPValue *V : cast<VPRecipeBase>(U)->definedValues())
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index c743359a1dddc..d8780c4836f4f 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -686,7 +686,6 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ splat (i1 true), %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
@@ -706,39 +705,36 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i32 [[TMP1]], 4
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF2]]
; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[N_VEC3]] to i8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 1, i8 2, i8 3>
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ [[TMP9]], %[[VEC_EPILOG_PH]] ], [ splat (i1 true), %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND6:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[VEC_IND6]] to <4 x i64>
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: store <4 x i64> [[TMP10]], ptr [[TMP12]], align 8
-; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i32 [[INDEX4]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i8> [[VEC_IND6]], splat (i8 4)
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT7]], [[N_VEC3]]
-; CHECK-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[VEC_IND5]] to <4 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr [[TMP11]], align 8
+; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX4]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <4 x i8> [[VEC_IND5]], splat (i8 4)
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6]], [[N_VEC3]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true))
-; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC3]]
-; CHECK-NEXT: br i1 [[CMP_N9]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true))
+; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC3]]
+; CHECK-NEXT: br i1 [[CMP_N8]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i1 [ [[TMP14]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i8 [ [[TMP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ true, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ false, %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i8 [ [[TMP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[OR_RED:%.*]] = phi i1 [ [[BC_MERGE_RDX10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[OR_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[OR_RED:%.*]] = phi i1 [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[OR_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL9]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_EXT]]
; CHECK-NEXT: store i64 [[IV_EXT]], ptr [[GEP]], align 8
@@ -747,7 +743,7 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i8 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[OR_NEXT_LCSSA:%.*]] = phi i1 [ [[OR_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[OR_NEXT_LCSSA:%.*]] = phi i1 [ [[OR_NEXT]], %[[LOOP]] ], [ true, %[[MIDDLE_BLOCK]] ], [ true, %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i1 [[OR_NEXT_LCSSA]]
;
entry:
More information about the llvm-commits
mailing list