[llvm] [LV][NFC] Add branch weight test showing incorrect behaviour (PR #144682)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 19 01:56:27 PDT 2025


https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/144682

>From 9828546618682bdc500e95aabdca438e1c3e99e6 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 18 Jun 2025 10:18:59 +0000
Subject: [PATCH 1/2] [LV][NFC] Add branch weight test showing incorrect
 behaviour

This patch adds a test that shows incorrect branch weights
being set in function

EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |   2 +
 .../LoopVectorize/branch-weights.ll           | 160 ++++++++++++------
 2 files changed, 107 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3b16248f962bc..e14f985efd96a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7683,6 +7683,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
   BranchInst &BI =
       *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
   if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
+    // FIXME: See test Transforms/LoopVectorize/branch-weights.ll. I don't
+    // think the MainLoopStep is correct.
     unsigned MainLoopStep = UF * VF.getKnownMinValue();
     unsigned EpilogueLoopStep =
         EPI.EpilogueUF * EPI.EpilogueVF.getKnownMinValue();
diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
index e11f77d8aeaec..d162e7aff5f32 100644
--- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll
+++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
@@ -1,53 +1,81 @@
-; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4  -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br" --filter "^.*:" --version 5
+; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4  -enable-epilogue-vectorization \
+; RUN:   -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
+; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4  -enable-epilogue-vectorization \
+; RUN:   -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
 
-; CHECK-LABEL: @f0(
-;
-; CHECK: entry:
-; CHECK:   br i1 %cmp.entry, label %iter.check, label %exit, !prof [[PROF_F0_ENTRY:![0-9]+]]
-;
-; CHECK: iter.check:
-; CHECK:   br i1 %min.iters.check, label %vec.epilog.scalar.ph, label %vector.scevcheck, !prof [[PROF_F0_UNLIKELY:![0-9]+]]
-;
-; CHECK: vector.scevcheck:
-; CHECK:   br i1 %4, label %vec.epilog.scalar.ph, label %vector.main.loop.iter.check, !prof [[PROF_F0_UNLIKELY]]
-;
-; CHECK: vector.main.loop.iter.check:
-; CHECK:   br i1 %min.iters.check1, label %vec.epilog.ph, label %vector.ph, !prof [[PROF_F0_UNLIKELY]]
-;
-; CHECK: vector.ph:
-; CHECK:   br label %vector.body
-;
-; CHECK: vector.body:
-; CHECK:   br i1 {{.+}}, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]]
-;
-; CHECK: middle.block:
-; CHECK:   br i1 %cmp.n, label %exit.loopexit, label %vec.epilog.iter.check, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]]
-;
-; CHECK: vec.epilog.iter.check:
-; CHECK:   br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph, !prof [[PROF_F0_VEC_EPILOGUE_SKIP:![0-9]+]]
-;
-; CHECK: vec.epilog.ph:
-; CHECK:   br label %vec.epilog.vector.body
-;
-; CHECK: vec.epilog.vector.body:
-; CHECK:   br i1 {{.+}}, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]]
-;
-; CHECK: vec.epilog.middle.block:
-; CHECK:   br i1 %cmp.n{{.+}}, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]]
-;
-; CHECK: vec.epilog.scalar.ph:
-; CHECK:   br label %loop
-;
-; CHECK: loop:
-; CHECK:   br i1 %cmp.loop, label %loop, label %exit.loopexit, !prof [[PROF_F0_LOOP:![0-9]+]]
+; FIXME: For MAINVF4IC2_EPI4 the branch weights in the terminator of
+; the VEC_EPILOG_ITER_CHECK block should be [4,4] since we process 8
+; scalar iterations in the main loop, leaving the remaining count to
+; be in the range [0,7]. That gives a 4:4 chance of skipping the
+; vector epilogue. I believe the problem lies in
+; EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
+; where the main loop VF is set to the same value as the epilogue VF.
+define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
+; MAINVF4IC1_EPI4-LABEL: define void @f0(
+; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
+; MAINVF4IC1_EPI4:  [[ENTRY:.*:]]
+; MAINVF4IC1_EPI4:    br i1 [[CMP_ENTRY:%.*]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
+; MAINVF4IC1_EPI4:  [[ITER_CHECK]]:
+; MAINVF4IC1_EPI4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
+; MAINVF4IC1_EPI4:  [[VECTOR_SCEVCHECK]]:
+; MAINVF4IC1_EPI4:    br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
+; MAINVF4IC1_EPI4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; MAINVF4IC1_EPI4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
+; MAINVF4IC1_EPI4:  [[VECTOR_PH]]:
+; MAINVF4IC1_EPI4:    br label %[[VECTOR_BODY:.*]]
+; MAINVF4IC1_EPI4:  [[VECTOR_BODY]]:
+; MAINVF4IC1_EPI4:    br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
+; MAINVF4IC1_EPI4:  [[MIDDLE_BLOCK]]:
+; MAINVF4IC1_EPI4:    br i1 [[CMP_N:%.*]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
+; MAINVF4IC1_EPI4:  [[VEC_EPILOG_ITER_CHECK]]:
+; MAINVF4IC1_EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
+; MAINVF4IC1_EPI4:  [[VEC_EPILOG_PH]]:
+; MAINVF4IC1_EPI4:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; MAINVF4IC1_EPI4:  [[VEC_EPILOG_VECTOR_BODY]]:
+; MAINVF4IC1_EPI4:    br i1 [[TMP12:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
+; MAINVF4IC1_EPI4:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; MAINVF4IC1_EPI4:    br i1 [[CMP_N8:%.*]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
+; MAINVF4IC1_EPI4:  [[VEC_EPILOG_SCALAR_PH]]:
+; MAINVF4IC1_EPI4:    br label %[[LOOP:.*]]
+; MAINVF4IC1_EPI4:  [[LOOP]]:
+; MAINVF4IC1_EPI4:    br i1 [[CMP_LOOP:%.*]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
+; MAINVF4IC1_EPI4:  [[EXIT_LOOPEXIT]]:
+; MAINVF4IC1_EPI4:    br label %[[EXIT]]
+; MAINVF4IC1_EPI4:  [[EXIT]]:
 ;
-; CHECK: exit.loopexit:
-; CHECK:   br label %exit
+; MAINVF4IC2_EPI4-LABEL: define void @f0(
+; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
+; MAINVF4IC2_EPI4:  [[ENTRY:.*:]]
+; MAINVF4IC2_EPI4:    br i1 [[CMP_ENTRY:%.*]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[ITER_CHECK]]:
+; MAINVF4IC2_EPI4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[VECTOR_SCEVCHECK]]:
+; MAINVF4IC2_EPI4:    br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
+; MAINVF4IC2_EPI4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; MAINVF4IC2_EPI4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
+; MAINVF4IC2_EPI4:  [[VECTOR_PH]]:
+; MAINVF4IC2_EPI4:    br label %[[VECTOR_BODY:.*]]
+; MAINVF4IC2_EPI4:  [[VECTOR_BODY]]:
+; MAINVF4IC2_EPI4:    br i1 [[TMP9:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[MIDDLE_BLOCK]]:
+; MAINVF4IC2_EPI4:    br i1 [[CMP_N:%.*]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[VEC_EPILOG_ITER_CHECK]]:
+; MAINVF4IC2_EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[VEC_EPILOG_PH]]:
+; MAINVF4IC2_EPI4:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; MAINVF4IC2_EPI4:  [[VEC_EPILOG_VECTOR_BODY]]:
+; MAINVF4IC2_EPI4:    br i1 [[TMP13:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; MAINVF4IC2_EPI4:    br i1 [[CMP_N8:%.*]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[VEC_EPILOG_SCALAR_PH]]:
+; MAINVF4IC2_EPI4:    br label %[[LOOP:.*]]
+; MAINVF4IC2_EPI4:  [[LOOP]]:
+; MAINVF4IC2_EPI4:    br i1 [[CMP_LOOP:%.*]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
+; MAINVF4IC2_EPI4:  [[EXIT_LOOPEXIT]]:
+; MAINVF4IC2_EPI4:    br label %[[EXIT]]
+; MAINVF4IC2_EPI4:  [[EXIT]]:
 ;
-; CHECK: exit:
-; CHECK:   ret void
-
-define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
 entry:
   %cmp.entry = icmp sgt i32 %len, 0
   br i1 %cmp.entry, label %loop, label %exit, !prof !1
@@ -72,11 +100,33 @@ exit:
 !0 = !{!"function_entry_count", i64 13}
 !1 = !{!"branch_weights", i32 12, i32 1}
 !2 = !{!"branch_weights", i32 1234, i32 1}
-
-; CHECK: [[PROF_F0_ENTRY]] = !{!"branch_weights", i32 12, i32 1}
-; CHECK: [[PROF_F0_UNLIKELY]] = !{!"branch_weights", i32 1, i32 127}
-; CHECK: [[PROF_F0_VECTOR_BODY]] = !{!"branch_weights", i32 1, i32 307}
-; CHECK: [[PROF_F0_MIDDLE_BLOCKS]] =  !{!"branch_weights", i32 1, i32 3}
-; CHECK: [[PROF_F0_VEC_EPILOGUE_SKIP]] = !{!"branch_weights", i32 4, i32 0}
-; CHECK: [[PROF_F0_VEC_EPILOG_VECTOR_BODY]] = !{!"branch_weights", i32 0, i32 0}
-; CHECK: [[PROF_F0_LOOP]] = !{!"branch_weights", i32 2, i32 1}
+;.
+; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
+; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
+; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
+; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307}
+; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
+; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
+; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
+; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
+; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
+; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
+; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
+; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1}
+; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
+;.
+; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
+; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
+; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
+; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
+; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
+; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
+; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
+; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7}
+; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
+; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
+; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
+; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3}
+; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1}
+; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
+;.

>From d354a57e9bcc55b66ef6445c3db3e96a0f40d512 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 19 Jun 2025 08:55:07 +0000
Subject: [PATCH 2/2] Address review comment

---
 .../LoopVectorize/branch-weights.ll           | 60 +++++++++++++------
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
index d162e7aff5f32..6892709f085f7 100644
--- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll
+++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br" --filter "^.*:" --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
 ; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4  -enable-epilogue-vectorization \
 ; RUN:   -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
 ; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4  -enable-epilogue-vectorization \
@@ -15,31 +15,42 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
 ; MAINVF4IC1_EPI4-LABEL: define void @f0(
 ; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
 ; MAINVF4IC1_EPI4:  [[ENTRY:.*:]]
-; MAINVF4IC1_EPI4:    br i1 [[CMP_ENTRY:%.*]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
+; MAINVF4IC1_EPI4:    [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
+; MAINVF4IC1_EPI4:    br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
 ; MAINVF4IC1_EPI4:  [[ITER_CHECK]]:
-; MAINVF4IC1_EPI4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
+; MAINVF4IC1_EPI4:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
+; MAINVF4IC1_EPI4:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
 ; MAINVF4IC1_EPI4:  [[VECTOR_SCEVCHECK]]:
+; MAINVF4IC1_EPI4:    [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
+; MAINVF4IC1_EPI4:    [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
 ; MAINVF4IC1_EPI4:    br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
 ; MAINVF4IC1_EPI4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; MAINVF4IC1_EPI4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
+; MAINVF4IC1_EPI4:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4
+; MAINVF4IC1_EPI4:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
 ; MAINVF4IC1_EPI4:  [[VECTOR_PH]]:
 ; MAINVF4IC1_EPI4:    br label %[[VECTOR_BODY:.*]]
 ; MAINVF4IC1_EPI4:  [[VECTOR_BODY]]:
-; MAINVF4IC1_EPI4:    br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
+; MAINVF4IC1_EPI4:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
+; MAINVF4IC1_EPI4:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
 ; MAINVF4IC1_EPI4:  [[MIDDLE_BLOCK]]:
-; MAINVF4IC1_EPI4:    br i1 [[CMP_N:%.*]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
+; MAINVF4IC1_EPI4:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
+; MAINVF4IC1_EPI4:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
 ; MAINVF4IC1_EPI4:  [[VEC_EPILOG_ITER_CHECK]]:
-; MAINVF4IC1_EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
+; MAINVF4IC1_EPI4:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
+; MAINVF4IC1_EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
 ; MAINVF4IC1_EPI4:  [[VEC_EPILOG_PH]]:
 ; MAINVF4IC1_EPI4:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; MAINVF4IC1_EPI4:  [[VEC_EPILOG_VECTOR_BODY]]:
-; MAINVF4IC1_EPI4:    br i1 [[TMP12:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
+; MAINVF4IC1_EPI4:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
+; MAINVF4IC1_EPI4:    br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
 ; MAINVF4IC1_EPI4:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; MAINVF4IC1_EPI4:    br i1 [[CMP_N8:%.*]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
+; MAINVF4IC1_EPI4:    [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
+; MAINVF4IC1_EPI4:    br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
 ; MAINVF4IC1_EPI4:  [[VEC_EPILOG_SCALAR_PH]]:
 ; MAINVF4IC1_EPI4:    br label %[[LOOP:.*]]
 ; MAINVF4IC1_EPI4:  [[LOOP]]:
-; MAINVF4IC1_EPI4:    br i1 [[CMP_LOOP:%.*]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
+; MAINVF4IC1_EPI4:    [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
+; MAINVF4IC1_EPI4:    br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
 ; MAINVF4IC1_EPI4:  [[EXIT_LOOPEXIT]]:
 ; MAINVF4IC1_EPI4:    br label %[[EXIT]]
 ; MAINVF4IC1_EPI4:  [[EXIT]]:
@@ -47,31 +58,42 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
 ; MAINVF4IC2_EPI4-LABEL: define void @f0(
 ; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
 ; MAINVF4IC2_EPI4:  [[ENTRY:.*:]]
-; MAINVF4IC2_EPI4:    br i1 [[CMP_ENTRY:%.*]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
+; MAINVF4IC2_EPI4:    br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[ITER_CHECK]]:
-; MAINVF4IC2_EPI4:    br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
+; MAINVF4IC2_EPI4:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[VECTOR_SCEVCHECK]]:
+; MAINVF4IC2_EPI4:    [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
+; MAINVF4IC2_EPI4:    [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
 ; MAINVF4IC2_EPI4:    br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
 ; MAINVF4IC2_EPI4:  [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; MAINVF4IC2_EPI4:    br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
+; MAINVF4IC2_EPI4:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
+; MAINVF4IC2_EPI4:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
 ; MAINVF4IC2_EPI4:  [[VECTOR_PH]]:
 ; MAINVF4IC2_EPI4:    br label %[[VECTOR_BODY:.*]]
 ; MAINVF4IC2_EPI4:  [[VECTOR_BODY]]:
-; MAINVF4IC2_EPI4:    br i1 [[TMP9:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
+; MAINVF4IC2_EPI4:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[MIDDLE_BLOCK]]:
-; MAINVF4IC2_EPI4:    br i1 [[CMP_N:%.*]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
+; MAINVF4IC2_EPI4:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[VEC_EPILOG_ITER_CHECK]]:
-; MAINVF4IC2_EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
+; MAINVF4IC2_EPI4:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[VEC_EPILOG_PH]]:
 ; MAINVF4IC2_EPI4:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; MAINVF4IC2_EPI4:  [[VEC_EPILOG_VECTOR_BODY]]:
-; MAINVF4IC2_EPI4:    br i1 [[TMP13:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
+; MAINVF4IC2_EPI4:    br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; MAINVF4IC2_EPI4:    br i1 [[CMP_N8:%.*]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
+; MAINVF4IC2_EPI4:    br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[VEC_EPILOG_SCALAR_PH]]:
 ; MAINVF4IC2_EPI4:    br label %[[LOOP:.*]]
 ; MAINVF4IC2_EPI4:  [[LOOP]]:
-; MAINVF4IC2_EPI4:    br i1 [[CMP_LOOP:%.*]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
+; MAINVF4IC2_EPI4:    [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
+; MAINVF4IC2_EPI4:    br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
 ; MAINVF4IC2_EPI4:  [[EXIT_LOOPEXIT]]:
 ; MAINVF4IC2_EPI4:    br label %[[EXIT]]
 ; MAINVF4IC2_EPI4:  [[EXIT]]:



More information about the llvm-commits mailing list