[llvm] dc23869 - [LV] Handle vector trip count being zero in preparePlanForEpiVectorLoop.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 03:54:50 PDT 2025
Author: Florian Hahn
Date: 2025-08-20T11:54:22+01:00
New Revision: dc23869f98452ca2c4086f12bb431a8d6fdb8169
URL: https://github.com/llvm/llvm-project/commit/dc23869f98452ca2c4086f12bb431a8d6fdb8169
DIFF: https://github.com/llvm/llvm-project/commit/dc23869f98452ca2c4086f12bb431a8d6fdb8169.diff
LOG: [LV] Handle vector trip count being zero in preparePlanForEpiVectorLoop.
After a485e0e, we may not set the vector trip count in
preparePlanForEpilogueVectorLoop if it is zero. We should not choose a
VF * UF that makes the main vector loop dead (i.e. vector trip count is
zero), but there are some cases where this can happen currently.
In those cases, set EPI.VectorTripCount to zero.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 70f884016d08c..dd913f935bacf 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9796,6 +9796,19 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
"Must only have a single non-zero incoming value");
EPI.VectorTripCount = Inc;
}
+ // If we didn't find a non-zero vector trip count, all incoming values
+ // must be zero, which also means the vector trip count is zero. Pick the
+ // first zero as vector trip count.
+ // TODO: We should not choose VF * UF so the main vector loop is known to
+ // be dead.
+ if (!EPI.VectorTripCount) {
+ assert(
+ EPResumeVal->getNumIncomingValues() > 0 &&
+ all_of(EPResumeVal->incoming_values(),
+ [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
+ "all incoming values must be 0");
+ EPI.VectorTripCount = EPResumeVal->getOperand(0);
+ }
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
assert(all_of(IV->users(),
[](const VPUser *U) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
index 37b5f5dc16958..5d3f9acd51d68 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
@@ -333,3 +333,91 @@ for.body:
exit:
ret void
}
+
+; TODO: Choose smaller VF * UF for main loop, so we do not create a dead vector loop.
+define void @small_trip_count_loop(ptr %arg, ptr %arg2) {
+; CHECK-LABEL: @small_trip_count_loop(
+; CHECK-NEXT: iter.check:
+; CHECK-NEXT: [[ARG3:%.*]] = ptrtoint ptr [[ARG:%.*]] to i64
+; CHECK-NEXT: [[ARG21:%.*]] = ptrtoint ptr [[ARG2:%.*]] to i64
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ARG21]], [[ARG3]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
+; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 16
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 32
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 48
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARG]], align 1
+; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 10)
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD4]], splat (i8 10)
+; CHECK-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD5]], splat (i8 10)
+; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD6]], splat (i8 10)
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 16
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 32
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 48
+; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[ARG2]], align 1
+; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP8]], align 1
+; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1
+; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK: vec.epilog.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK: vec.epilog.ph:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK: vec.epilog.vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = add <16 x i8> [[WIDE_LOAD7]], splat (i8 10)
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 [[INDEX]]
+; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
+; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: vec.epilog.middle.block:
+; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK: vec.epilog.scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[IV]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP_A]], align 1
+; CHECK-NEXT: [[SELECT:%.*]] = add i8 [[LOAD]], 10
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 [[IV]]
+; CHECK-NEXT: store i8 [[SELECT]], ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 20
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr inbounds i8, ptr %arg, i32 %iv
+ %load = load i8, ptr %gep.A, align 1
+ %select = add i8 %load, 10
+ %gep.B = getelementptr inbounds i8, ptr %arg2, i32 %iv
+ store i8 %select, ptr %gep.B, align 1
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv, 20
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list