[llvm] [LV]: Improve accuracy of calculating remaining iterations of MainLoopVF (PR #156723)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 00:52:20 PDT 2025
================
@@ -9,49 +9,57 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT-LABEL: define void @cost_store_i8(
; DEFAULT-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
; DEFAULT-NEXT: iter.check:
-; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP10]], 3
+; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP13]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; DEFAULT: vector.main.loop.iter.check:
-; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
-; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP3]]
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 101, [[TMP1]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
-; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32
-; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP5]]
+; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
+; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]]
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
; DEFAULT: vector.body:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; DEFAULT-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4
-; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP9]], i64 [[TMP23]]
-; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP9]], align 1
-; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP24]], align 1
-; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
+; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP6]]
+; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP4]], align 1
+; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP7]], align 1
+; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; DEFAULT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; DEFAULT: middle.block:
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; DEFAULT: vec.epilog.iter.check:
-; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
+; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP13]]
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; DEFAULT: vec.epilog.ph:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; DEFAULT-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
+; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP12]]
+; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]]
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; DEFAULT: vec.epilog.vector.body:
-; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5]]
-; DEFAULT-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP19]], align 1
-; DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 8
-; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT6]], 96
-; DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
+; DEFAULT-NEXT: store <vscale x 8 x i8> zeroinitializer, ptr [[TMP9]], align 1
----------------
david-arm wrote:
Looks like this change is happening because of the unfair check in the loop vectoriser code:
```
if (RemainingIterations && !NextVF.Width.isScalable()) {
...
```
i.e. we're only seeing if the remaining iterations is greater than the next VF if it's fixed-width. However, we can apply the same check for scalable vectors by using an estimate of the next VF using vscale-for-tuning. But that's for a different PR!
https://github.com/llvm/llvm-project/pull/156723
More information about the llvm-commits
mailing list