[llvm] 8140779 - [LV] Improve accuracy of branch weights in epilogue iteration check block (#152980)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 12 02:37:51 PDT 2025


Author: David Sherwood
Date: 2025-08-12T10:37:47+01:00
New Revision: 8140779a9a6fa3fac4820d5d2ba6121c612112c3

URL: https://github.com/llvm/llvm-project/commit/8140779a9a6fa3fac4820d5d2ba6121c612112c3
DIFF: https://github.com/llvm/llvm-project/commit/8140779a9a6fa3fac4820d5d2ba6121c612112c3.diff

LOG: [LV] Improve accuracy of branch weights in epilogue iteration check block (#152980)

When one of the vector loops (main or epilogue) is scalable and the
other isn't, we can use the estimated value of vscale to improve the
accuracy.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 03fed224bc5b4..ea39ca4a7f9b4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7644,9 +7644,11 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
   BranchInst &BI =
       *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
   if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
-    unsigned MainLoopStep = EPI.MainLoopUF * EPI.MainLoopVF.getKnownMinValue();
+    auto VScale = Cost->getVScaleForTuning();
+    unsigned MainLoopStep =
+        estimateElementCount(EPI.MainLoopVF * EPI.MainLoopUF, VScale);
     unsigned EpilogueLoopStep =
-        EPI.EpilogueUF * EPI.EpilogueVF.getKnownMinValue();
+        estimateElementCount(EPI.EpilogueVF * EPI.EpilogueUF, VScale);
     // We assume the remaining `Count` is equally distributed in
     // [0, MainLoopStep)
     // So the probability for `Count < EpilogueLoopStep` should be

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll
index 9987701e44cde..7dcddbf630d1f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br" --filter "^.*:" --version 5
 ; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v1 -force-vector-interleave=1 -S < %s |  FileCheck %s -check-prefix=CHECK-V1-IC1
+; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v1 -force-vector-interleave=1 \
+; RUN:   -epilogue-vectorization-force-VF=4 -S < %s |  FileCheck %s -check-prefix=CHECK-V1-IC1-FORCE-EPI4
 ; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v2 -force-vector-interleave=1 -S < %s |  FileCheck %s -check-prefix=CHECK-V2-IC1
 ; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v2 -force-vector-interleave=4 -S < %s |  FileCheck %s -check-prefix=CHECK-V2-IC4
 
@@ -10,15 +12,15 @@ target triple = "aarch64-unknown-linux-gnu"
 
 ; We expect the branch weight computations after vectorisation to use
 ; vscale=2 for neoverse-v1 and vscale=1 for neoverse-v2.
-define void @_Z3foov(i64 %n) {
-; CHECK-V1-IC1-LABEL: define void @_Z3foov(
+define void @foo_i32(i64 %n) {
+; CHECK-V1-IC1-LABEL: define void @foo_i32(
 ; CHECK-V1-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-V1-IC1:  [[ENTRY:.*:]]
 ; CHECK-V1-IC1:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0:![0-9]+]]
 ; CHECK-V1-IC1:  [[VECTOR_PH]]:
 ; CHECK-V1-IC1:    br label %[[VECTOR_BODY:.*]]
 ; CHECK-V1-IC1:  [[VECTOR_BODY]]:
-; CHECK-V1-IC1:    br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK-V1-IC1:    br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
 ; CHECK-V1-IC1:  [[MIDDLE_BLOCK]]:
 ; CHECK-V1-IC1:    br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF4:![0-9]+]]
 ; CHECK-V1-IC1:  [[SCALAR_PH]]:
@@ -27,7 +29,33 @@ define void @_Z3foov(i64 %n) {
 ; CHECK-V1-IC1:    br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK-V1-IC1:  [[FOR_COND_CLEANUP]]:
 ;
-; CHECK-V2-IC1-LABEL: define void @_Z3foov(
+; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32(
+; CHECK-V1-IC1-FORCE-EPI4-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-V1-IC1-FORCE-EPI4:  [[ITER_CHECK:.*:]]
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VECTOR_PH]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br label %[[VECTOR_BODY:.*]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VECTOR_BODY]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[MIDDLE_BLOCK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF4:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_PH]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br label %[[FOR_BODY:.*]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[FOR_BODY]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[FOR_COND_CLEANUP]]:
+;
+; CHECK-V2-IC1-LABEL: define void @foo_i32(
 ; CHECK-V2-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-V2-IC1:  [[ENTRY:.*:]]
 ; CHECK-V2-IC1:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0:![0-9]+]]
@@ -43,9 +71,9 @@ define void @_Z3foov(i64 %n) {
 ; CHECK-V2-IC1:    br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK-V2-IC1:  [[FOR_COND_CLEANUP]]:
 ;
-; CHECK-V2-IC4-LABEL: define void @_Z3foov(
+; CHECK-V2-IC4-LABEL: define void @foo_i32(
 ; CHECK-V2-IC4-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-V2-IC4:  [[VEC_EPILOG_VECTOR_BODY1:.*:]]
+; CHECK-V2-IC4:  [[ITER_CHECK:.*:]]
 ; CHECK-V2-IC4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0:![0-9]+]]
 ; CHECK-V2-IC4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
 ; CHECK-V2-IC4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
@@ -86,6 +114,128 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
+define void @foo_i8(i64 %n) {
+; CHECK-V1-IC1-LABEL: define void @foo_i8(
+; CHECK-V1-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V1-IC1:  [[ITER_CHECK:.*:]]
+; CHECK-V1-IC1:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
+; CHECK-V1-IC1:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-V1-IC1:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
+; CHECK-V1-IC1:  [[VECTOR_PH]]:
+; CHECK-V1-IC1:    br label %[[VECTOR_BODY:.*]]
+; CHECK-V1-IC1:  [[VECTOR_BODY]]:
+; CHECK-V1-IC1:    br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-V1-IC1:  [[MIDDLE_BLOCK]]:
+; CHECK-V1-IC1:    br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7]]
+; CHECK-V1-IC1:  [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-V1-IC1:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]]
+; CHECK-V1-IC1:  [[VEC_EPILOG_PH]]:
+; CHECK-V1-IC1:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK-V1-IC1:  [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-V1-IC1:    br i1 [[TMP15:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-V1-IC1:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-V1-IC1:    br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
+; CHECK-V1-IC1:  [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-V1-IC1:    br label %[[FOR_BODY:.*]]
+; CHECK-V1-IC1:  [[FOR_BODY]]:
+; CHECK-V1-IC1:    br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-V1-IC1:  [[FOR_COND_CLEANUP]]:
+;
+; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i8(
+; CHECK-V1-IC1-FORCE-EPI4-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V1-IC1-FORCE-EPI4:  [[ITER_CHECK:.*:]]
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VECTOR_PH]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br label %[[VECTOR_BODY:.*]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VECTOR_BODY]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[MIDDLE_BLOCK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_PH]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br label %[[FOR_BODY:.*]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[FOR_BODY]]:
+; CHECK-V1-IC1-FORCE-EPI4:    br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-V1-IC1-FORCE-EPI4:  [[FOR_COND_CLEANUP]]:
+;
+; CHECK-V2-IC1-LABEL: define void @foo_i8(
+; CHECK-V2-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V2-IC1:  [[ITER_CHECK:.*:]]
+; CHECK-V2-IC1:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
+; CHECK-V2-IC1:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-V2-IC1:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
+; CHECK-V2-IC1:  [[VECTOR_PH]]:
+; CHECK-V2-IC1:    br label %[[VECTOR_BODY:.*]]
+; CHECK-V2-IC1:  [[VECTOR_BODY]]:
+; CHECK-V2-IC1:    br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-V2-IC1:  [[MIDDLE_BLOCK]]:
+; CHECK-V2-IC1:    br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10:![0-9]+]]
+; CHECK-V2-IC1:  [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-V2-IC1:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]]
+; CHECK-V2-IC1:  [[VEC_EPILOG_PH]]:
+; CHECK-V2-IC1:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK-V2-IC1:  [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-V2-IC1:    br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-V2-IC1:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-V2-IC1:    br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF5]]
+; CHECK-V2-IC1:  [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-V2-IC1:    br label %[[FOR_BODY:.*]]
+; CHECK-V2-IC1:  [[FOR_BODY]]:
+; CHECK-V2-IC1:    br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-V2-IC1:  [[FOR_COND_CLEANUP]]:
+;
+; CHECK-V2-IC4-LABEL: define void @foo_i8(
+; CHECK-V2-IC4-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-V2-IC4:  [[ITER_CHECK:.*:]]
+; CHECK-V2-IC4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
+; CHECK-V2-IC4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-V2-IC4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
+; CHECK-V2-IC4:  [[VECTOR_PH]]:
+; CHECK-V2-IC4:    br label %[[VECTOR_BODY:.*]]
+; CHECK-V2-IC4:  [[VECTOR_BODY]]:
+; CHECK-V2-IC4:    br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-V2-IC4:  [[MIDDLE_BLOCK]]:
+; CHECK-V2-IC4:    br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF1]]
+; CHECK-V2-IC4:  [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-V2-IC4:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]]
+; CHECK-V2-IC4:  [[VEC_EPILOG_PH]]:
+; CHECK-V2-IC4:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK-V2-IC4:  [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-V2-IC4:    br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-V2-IC4:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-V2-IC4:    br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]]
+; CHECK-V2-IC4:  [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-V2-IC4:    br label %[[FOR_BODY:.*]]
+; CHECK-V2-IC4:  [[FOR_BODY]]:
+; CHECK-V2-IC4:    br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-V2-IC4:  [[FOR_COND_CLEANUP]]:
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %iv
+  %load = load i8, ptr %arrayidx, align 1
+  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %iv
+  store i8 %load, ptr %arrayidx2, align 1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !0
+
+for.cond.cleanup:
+  ret void
+}
+
 !0 = !{!"branch_weights", i32 1, i32 1023}
 ;.
 ; CHECK-V1-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
@@ -95,6 +245,28 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7}
 ; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0}
 ; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]}
+; CHECK-V1-IC1: [[PROF7]] = !{!"branch_weights", i32 1, i32 31}
+; CHECK-V1-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META3]]}
+; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 16, i32 16}
+; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]]}
+; CHECK-V1-IC1: [[PROF11]] = !{!"branch_weights", i32 1, i32 15}
+; CHECK-V1-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META2]]}
+;.
+; CHECK-V1-IC1-FORCE-EPI4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
+; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK-V1-IC1-FORCE-EPI4: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-V1-IC1-FORCE-EPI4: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-V1-IC1-FORCE-EPI4: [[PROF4]] = !{!"branch_weights", i32 1, i32 7}
+; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 4, i32 4}
+; CHECK-V1-IC1-FORCE-EPI4: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META3]]}
+; CHECK-V1-IC1-FORCE-EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
+; CHECK-V1-IC1-FORCE-EPI4: [[PROF8]] = !{!"branch_weights", i32 0, i32 0}
+; CHECK-V1-IC1-FORCE-EPI4: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META2]]}
+; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 31}
+; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META3]]}
+; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 4, i32 28}
+; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]]}
+; CHECK-V1-IC1-FORCE-EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]}
 ;.
 ; CHECK-V2-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
 ; CHECK-V2-IC1: [[PROF1]] = !{!"branch_weights", i32 1, i32 255}
@@ -104,6 +276,12 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-V2-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 3}
 ; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
 ; CHECK-V2-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
+; CHECK-V2-IC1: [[PROF8]] = !{!"branch_weights", i32 1, i32 63}
+; CHECK-V2-IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META4]]}
+; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 15}
+; CHECK-V2-IC1: [[PROF11]] = !{!"branch_weights", i32 4, i32 12}
+; CHECK-V2-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META4]]}
+; CHECK-V2-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]], [[META3]]}
 ;.
 ; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
 ; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63}
@@ -116,4 +294,9 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3}
 ; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
 ; CHECK-V2-IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]}
+; CHECK-V2-IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]]}
+; CHECK-V2-IC4: [[PROF12]] = !{!"branch_weights", i32 8, i32 56}
+; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]]}
+; CHECK-V2-IC4: [[PROF14]] = !{!"branch_weights", i32 1, i32 7}
+; CHECK-V2-IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]}
 ;.


        


More information about the llvm-commits mailing list