[llvm] [LV] Use SCEV::getElementCount in selectEpilogueVectorizationFactor. (PR #150018)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 22 06:38:11 PDT 2025
================
@@ -152,6 +152,107 @@ exit:
ret void
}
+define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
+; CHECK-LABEL: @main_vf_vscale_x_2_no_epi_iteration(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP9]]
+; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
+; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP10]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+;
+; CHECK-VF8-LABEL: @main_vf_vscale_x_2_no_epi_iteration(
+; CHECK-VF8-NEXT: iter.check:
+; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-VF8: vector.main.loop.iter.check:
+; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF8: vector.ph:
+; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-VF8: vector.body:
+; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
+; CHECK-VF8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
+; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP9]]
+; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
+; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP10]], align 1
+; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-VF8-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF8-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF8: middle.block:
+; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK-VF8: vec.epilog.iter.check:
+; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
+; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
+; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-VF8: vec.epilog.ph:
+; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK-VF8: vec.epilog.vector.body:
----------------
david-arm wrote:
I guess it's a minor thing that it can be improved later, but when forcing an epilogue VF of 8 we should probably ignore the request because we know it will never execute.
https://github.com/llvm/llvm-project/pull/150018
More information about the llvm-commits
mailing list