[llvm] [VPlan] Remove non-reductions after simplifications. (PR #176795)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 27 06:49:22 PST 2026
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/176795
>From f1798f2f57cdcfc3420657b6790c6c3bbc1f31de Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 19 Jan 2026 17:59:38 +0000
Subject: [PATCH 1/2] [VPlan] Remove non-reductions after simplifications.
In some cases, we identify patterns as reductions, even though they can
be simplified to a non-reduction.
Mark VPReductionPHIRecipe as not reading from memory & not having
side-effects, to clean them up.
We also need to remove ComputeReductionResult VPInstructions with
live-in arguments. This means there is actually no reduction, and we
need to fold it to the live in. Otherwise we would incorrectly reduce
the live-in.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 4 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 ++
.../epilog-vectorization-reductions.ll | 40 +++++++++----------
4 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 251917f4b91c1..f21d370c1a965 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7287,9 +7287,7 @@ static VPRecipeBase *findRecipe(VPValue *Start, PredT Pred) {
for (unsigned I = 0; I != Worklist.size(); ++I) {
VPValue *Cur = Worklist[I];
auto *R = Cur->getDefiningRecipe();
- // TODO: Skip live-ins once no degenerate reductions (ones with constant
- // backedge values) are generated.
- if (R && Pred(R))
+ if (Pred(R))
return R;
for (VPUser *U : Cur->users()) {
for (VPValue *V : cast<VPRecipeBase>(U)->definedValues())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5ef0c0d195eb2..16a5e6d9c43d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -127,6 +127,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPBranchOnMaskSC:
case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
+ case VPReductionPHISC:
case VPPredInstPHISC:
case VPScalarIVStepsSC:
case VPWidenStoreEVLSC:
@@ -162,6 +163,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();
case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
+ case VPReductionPHISC:
case VPPredInstPHISC:
case VPVectorEndPointerSC:
return false;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a39b171ab4cd6..a258b00780cdb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1510,6 +1510,10 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
+ if (Def->getNumOperands() == 1 &&
+ match(Def, m_ComputeReductionResult(m_VPValue(A))) && isa<VPIRValue>(A))
+ return Def->replaceAllUsesWith(A);
+
// Some simplifications can only be applied after unrolling. Perform them
// below.
if (!Plan->isUnrolled())
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index c743359a1dddc..d8780c4836f4f 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -686,7 +686,6 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ splat (i1 true), %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
@@ -706,39 +705,36 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i32 [[TMP1]], 4
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF2]]
; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[N_VEC3]] to i8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 1, i8 2, i8 3>
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ [[TMP9]], %[[VEC_EPILOG_PH]] ], [ splat (i1 true), %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND6:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[VEC_IND6]] to <4 x i64>
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: store <4 x i64> [[TMP10]], ptr [[TMP12]], align 8
-; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i32 [[INDEX4]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i8> [[VEC_IND6]], splat (i8 4)
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT7]], [[N_VEC3]]
-; CHECK-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[VEC_IND5]] to <4 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr [[TMP11]], align 8
+; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX4]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <4 x i8> [[VEC_IND5]], splat (i8 4)
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6]], [[N_VEC3]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true))
-; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC3]]
-; CHECK-NEXT: br i1 [[CMP_N9]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true))
+; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC3]]
+; CHECK-NEXT: br i1 [[CMP_N8]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i1 [ [[TMP14]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i8 [ [[TMP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ true, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ false, %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i8 [ [[TMP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[OR_RED:%.*]] = phi i1 [ [[BC_MERGE_RDX10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[OR_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[OR_RED:%.*]] = phi i1 [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[OR_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL9]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_EXT]]
; CHECK-NEXT: store i64 [[IV_EXT]], ptr [[GEP]], align 8
@@ -747,7 +743,7 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i8 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[OR_NEXT_LCSSA:%.*]] = phi i1 [ [[OR_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[OR_NEXT_LCSSA:%.*]] = phi i1 [ [[OR_NEXT]], %[[LOOP]] ], [ true, %[[MIDDLE_BLOCK]] ], [ true, %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i1 [[OR_NEXT_LCSSA]]
;
entry:
>From b463a747db4c7e6d41d9f6ba954935a969beb6e4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 23 Jan 2026 21:24:29 +0000
Subject: [PATCH 2/2] !fixup use m_VIRValue() matcher
---
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 3 +++
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 +++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 5d05d0dffc570..bd2e675d21493 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -246,6 +246,9 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
/// Match a VPValue, capturing it if we match.
inline bind_ty<VPValue> m_VPValue(VPValue *&V) { return V; }
+// Match a VPIRValue.
+inline bind_ty<VPIRValue> m_VPIRValue(VPIRValue *&V) { return V; }
+
/// Match a VPInstruction, capturing if we match.
inline bind_ty<VPInstruction> m_VPInstruction(VPInstruction *&V) { return V; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6c6acb402b9da..ea64fd25bd466 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1510,9 +1510,10 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
+ VPIRValue *IRV;
if (Def->getNumOperands() == 1 &&
- match(Def, m_ComputeReductionResult(m_VPValue(A))) && isa<VPIRValue>(A))
- return Def->replaceAllUsesWith(A);
+ match(Def, m_ComputeReductionResult(m_VPIRValue(IRV))))
+ return Def->replaceAllUsesWith(IRV);
// Some simplifications can only be applied after unrolling. Perform them
// below.
More information about the llvm-commits
mailing list