[llvm] a668c0f - [LoopPredication] Fix division by zero in case of zero branch weights (#66506)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 18 18:38:33 PDT 2023


Author: Danila Malyutin
Date: 2023-09-19T04:38:29+03:00
New Revision: a668c0f687f9029c3171c757e1fa5df891eb7558

URL: https://github.com/llvm/llvm-project/commit/a668c0f687f9029c3171c757e1fa5df891eb7558
DIFF: https://github.com/llvm/llvm-project/commit/a668c0f687f9029c3171c757e1fa5df891eb7558.diff

LOG: [LoopPredication] Fix division by zero in case of zero branch weights (#66506)

Treat the case where all branch weights are zero as if there was no
profile.
Fixes #66382

Added: 
    llvm/test/Transforms/LoopPredication/scale.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopPredication.cpp
    llvm/test/Transforms/LoopPredication/pr66382.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index a58ab093a1f75d3..55079b4a42d2fae 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -967,6 +967,9 @@ bool LoopPredication::isLoopProfitableToPredicate() {
           Numerator += Weight;
         Denominator += Weight;
       }
+      // If all weights are zero act as if there was no profile data
+      if (Denominator == 0)
+        return BranchProbability::getBranchProbability(1, NumSucc);
       return BranchProbability::getBranchProbability(Numerator, Denominator);
     } else {
       assert(LatchBlock != ExitingBlock &&

diff  --git a/llvm/test/Transforms/LoopPredication/pr66382.ll b/llvm/test/Transforms/LoopPredication/pr66382.ll
index 3ac4cac0615f464..f9a14d470453cf0 100644
--- a/llvm/test/Transforms/LoopPredication/pr66382.ll
+++ b/llvm/test/Transforms/LoopPredication/pr66382.ll
@@ -1,4 +1,4 @@
-; XFAIL: *
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
 ; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,loop-mssa(loop-predication)' %s | FileCheck %s
 
 target triple = "x86_64-unknown-linux-gnu"
@@ -6,7 +6,26 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Function Attrs: nocallback nofree nosync willreturn
 declare void @llvm.experimental.guard(i1, ...) #0
 
+; Check that LoopPredication doesn't crash on all-zero branch weights
 define void @foo() {
+; CHECK-LABEL: define void @foo() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       Header:
+; CHECK-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[J_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false, i32 0) [ "deopt"() ]
+; CHECK-NEXT:    [[J_NEXT]] = add i64 [[J2]], 1
+; CHECK-NEXT:    br i1 false, label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       Latch:
+; CHECK-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J2]], 0
+; CHECK-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[COMMON_RET_LOOPEXIT:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       common.ret.loopexit:
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       exit:
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
 entry:
   br label %Header
 

diff  --git a/llvm/test/Transforms/LoopPredication/scale.ll b/llvm/test/Transforms/LoopPredication/scale.ll
new file mode 100644
index 000000000000000..29e48cf57959801
--- /dev/null
+++ b/llvm/test/Transforms/LoopPredication/scale.ll
@@ -0,0 +1,259 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,loop-mssa(loop-predication)' -verify-memoryssa -loop-predication-latch-probability-scale=2 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PROF
+; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,loop-mssa(loop-predication)' -verify-memoryssa -loop-predication-latch-probability-scale=1.9 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOTPROF
+
+; LatchExitProbability: 0x20000000 / 0x80000000 = 25.00%
+; ExitingBlockProbability: 0x40000000 / 0x80000000 = 50.00%
+; Predicate is profitable when the scale factor is 2 and not profitable if it's less than 2.
+define i64 @predicate_eq_ones(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 {
+; CHECK-PROF-LABEL: define i64 @predicate_eq_ones(
+; CHECK-PROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-PROF-NEXT:  entry:
+; CHECK-PROF-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64
+; CHECK-PROF-NEXT:    [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4
+; CHECK-PROF-NEXT:    [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
+; CHECK-PROF-NEXT:    [[TMP3:%.*]] = freeze i1 [[TMP2]]
+; CHECK-PROF-NEXT:    br label [[HEADER:%.*]]
+; CHECK-PROF:       Header:
+; CHECK-PROF-NEXT:    [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ]
+; CHECK-PROF-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
+; CHECK-PROF-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
+; CHECK-PROF-NEXT:    call void @llvm.assume(i1 [[WITHIN_BOUNDS]])
+; CHECK-PROF-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
+; CHECK-PROF-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
+; CHECK-PROF-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-PROF:       Latch:
+; CHECK-PROF-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
+; CHECK-PROF-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK-PROF:       exitLatch:
+; CHECK-PROF-NEXT:    ret i64 1
+; CHECK-PROF:       exit:
+; CHECK-PROF-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ]
+; CHECK-PROF-NEXT:    [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8
+; CHECK-PROF-NEXT:    ret i64 [[RESULT_LE]]
+;
+; CHECK-NOTPROF-LABEL: define i64 @predicate_eq_ones(
+; CHECK-NOTPROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NOTPROF-NEXT:  entry:
+; CHECK-NOTPROF-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64
+; CHECK-NOTPROF-NEXT:    [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4
+; CHECK-NOTPROF-NEXT:    br label [[HEADER:%.*]]
+; CHECK-NOTPROF:       Header:
+; CHECK-NOTPROF-NEXT:    [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ]
+; CHECK-NOTPROF-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
+; CHECK-NOTPROF-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
+; CHECK-NOTPROF-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NOTPROF-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
+; CHECK-NOTPROF-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
+; CHECK-NOTPROF-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NOTPROF:       Latch:
+; CHECK-NOTPROF-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
+; CHECK-NOTPROF-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK-NOTPROF:       exitLatch:
+; CHECK-NOTPROF-NEXT:    ret i64 1
+; CHECK-NOTPROF:       exit:
+; CHECK-NOTPROF-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ]
+; CHECK-NOTPROF-NEXT:    [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8
+; CHECK-NOTPROF-NEXT:    ret i64 [[RESULT_LE]]
+;
+entry:
+  %length.ext = zext i32 %length to i64
+  %n.pre = load i64, ptr %n_addr, align 4
+  br label %Header
+
+Header:                                          ; preds = %entry, %Latch
+  %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ]
+  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
+  %within.bounds = icmp ult i64 %j2, %length.ext
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+  %innercmp = icmp eq i64 %j2, %n.pre
+  %j.next = add nuw nsw i64 %j2, 1
+  br i1 %innercmp, label %Latch, label %exit, !prof !0
+
+Latch:                                           ; preds = %Header
+  %speculate_trip_count = icmp ult i64 %j.next, 1048576
+  br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2
+
+exitLatch:                                            ; preds = %Latch
+  ret i64 1
+
+exit:                                             ; preds = %Header
+  %result.in3.lcssa = phi ptr [ %result.in3, %Header ]
+  %result.le = load i64, ptr %result.in3.lcssa, align 8
+  ret i64 %result.le
+}
+!0 = !{!"branch_weights", i32 1, i32 1}
+
+; Same as the previous one, but with zero weights (should be treated as if no profile - equal probability)
+define i64 @predicate_eq_zeroes(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 {
+; CHECK-PROF-LABEL: define i64 @predicate_eq_zeroes(
+; CHECK-PROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] {
+; CHECK-PROF-NEXT:  entry:
+; CHECK-PROF-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64
+; CHECK-PROF-NEXT:    [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4
+; CHECK-PROF-NEXT:    [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
+; CHECK-PROF-NEXT:    [[TMP3:%.*]] = freeze i1 [[TMP2]]
+; CHECK-PROF-NEXT:    br label [[HEADER:%.*]]
+; CHECK-PROF:       Header:
+; CHECK-PROF-NEXT:    [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ]
+; CHECK-PROF-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
+; CHECK-PROF-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
+; CHECK-PROF-NEXT:    call void @llvm.assume(i1 [[WITHIN_BOUNDS]])
+; CHECK-PROF-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
+; CHECK-PROF-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
+; CHECK-PROF-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF3:![0-9]+]]
+; CHECK-PROF:       Latch:
+; CHECK-PROF-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
+; CHECK-PROF-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]]
+; CHECK-PROF:       exitLatch:
+; CHECK-PROF-NEXT:    ret i64 1
+; CHECK-PROF:       exit:
+; CHECK-PROF-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ]
+; CHECK-PROF-NEXT:    [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8
+; CHECK-PROF-NEXT:    ret i64 [[RESULT_LE]]
+;
+; CHECK-NOTPROF-LABEL: define i64 @predicate_eq_zeroes(
+; CHECK-NOTPROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] {
+; CHECK-NOTPROF-NEXT:  entry:
+; CHECK-NOTPROF-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64
+; CHECK-NOTPROF-NEXT:    [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4
+; CHECK-NOTPROF-NEXT:    br label [[HEADER:%.*]]
+; CHECK-NOTPROF:       Header:
+; CHECK-NOTPROF-NEXT:    [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ]
+; CHECK-NOTPROF-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
+; CHECK-NOTPROF-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
+; CHECK-NOTPROF-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NOTPROF-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
+; CHECK-NOTPROF-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
+; CHECK-NOTPROF-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF3:![0-9]+]]
+; CHECK-NOTPROF:       Latch:
+; CHECK-NOTPROF-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
+; CHECK-NOTPROF-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]]
+; CHECK-NOTPROF:       exitLatch:
+; CHECK-NOTPROF-NEXT:    ret i64 1
+; CHECK-NOTPROF:       exit:
+; CHECK-NOTPROF-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ]
+; CHECK-NOTPROF-NEXT:    [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8
+; CHECK-NOTPROF-NEXT:    ret i64 [[RESULT_LE]]
+;
+entry:
+  %length.ext = zext i32 %length to i64
+  %n.pre = load i64, ptr %n_addr, align 4
+  br label %Header
+
+Header:                                          ; preds = %entry, %Latch
+  %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ]
+  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
+  %within.bounds = icmp ult i64 %j2, %length.ext
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+  %innercmp = icmp eq i64 %j2, %n.pre
+  %j.next = add nuw nsw i64 %j2, 1
+  br i1 %innercmp, label %Latch, label %exit, !prof !1
+
+Latch:                                           ; preds = %Header
+  %speculate_trip_count = icmp ult i64 %j.next, 1048576
+  br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2
+
+exitLatch:                                            ; preds = %Latch
+  ret i64 1
+
+exit:                                             ; preds = %Header
+  %result.in3.lcssa = phi ptr [ %result.in3, %Header ]
+  %result.le = load i64, ptr %result.in3.lcssa, align 8
+  ret i64 %result.le
+}
+!1 = !{!"branch_weights", i32 0, i32 0}
+
+; No profile on br in Header
+define i64 @predicate_eq_none(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 {
+; CHECK-PROF-LABEL: define i64 @predicate_eq_none(
+; CHECK-PROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] {
+; CHECK-PROF-NEXT:  entry:
+; CHECK-PROF-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64
+; CHECK-PROF-NEXT:    [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4
+; CHECK-PROF-NEXT:    [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
+; CHECK-PROF-NEXT:    [[TMP3:%.*]] = freeze i1 [[TMP2]]
+; CHECK-PROF-NEXT:    br label [[HEADER:%.*]]
+; CHECK-PROF:       Header:
+; CHECK-PROF-NEXT:    [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ]
+; CHECK-PROF-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
+; CHECK-PROF-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
+; CHECK-PROF-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
+; CHECK-PROF-NEXT:    call void @llvm.assume(i1 [[WITHIN_BOUNDS]])
+; CHECK-PROF-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
+; CHECK-PROF-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
+; CHECK-PROF-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK-PROF:       Latch:
+; CHECK-PROF-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
+; CHECK-PROF-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]]
+; CHECK-PROF:       exitLatch:
+; CHECK-PROF-NEXT:    ret i64 1
+; CHECK-PROF:       exit:
+; CHECK-PROF-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ]
+; CHECK-PROF-NEXT:    [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8
+; CHECK-PROF-NEXT:    ret i64 [[RESULT_LE]]
+;
+; CHECK-NOTPROF-LABEL: define i64 @predicate_eq_none(
+; CHECK-NOTPROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] {
+; CHECK-NOTPROF-NEXT:  entry:
+; CHECK-NOTPROF-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64
+; CHECK-NOTPROF-NEXT:    [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4
+; CHECK-NOTPROF-NEXT:    br label [[HEADER:%.*]]
+; CHECK-NOTPROF:       Header:
+; CHECK-NOTPROF-NEXT:    [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ]
+; CHECK-NOTPROF-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
+; CHECK-NOTPROF-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
+; CHECK-NOTPROF-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NOTPROF-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
+; CHECK-NOTPROF-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
+; CHECK-NOTPROF-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK-NOTPROF:       Latch:
+; CHECK-NOTPROF-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
+; CHECK-NOTPROF-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]]
+; CHECK-NOTPROF:       exitLatch:
+; CHECK-NOTPROF-NEXT:    ret i64 1
+; CHECK-NOTPROF:       exit:
+; CHECK-NOTPROF-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ]
+; CHECK-NOTPROF-NEXT:    [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8
+; CHECK-NOTPROF-NEXT:    ret i64 [[RESULT_LE]]
+;
+entry:
+  %length.ext = zext i32 %length to i64
+  %n.pre = load i64, ptr %n_addr, align 4
+  br label %Header
+
+Header:                                          ; preds = %entry, %Latch
+  %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ]
+  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
+  %within.bounds = icmp ult i64 %j2, %length.ext
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+  %innercmp = icmp eq i64 %j2, %n.pre
+  %j.next = add nuw nsw i64 %j2, 1
+  br i1 %innercmp, label %Latch, label %exit
+
+Latch:                                           ; preds = %Header
+  %speculate_trip_count = icmp ult i64 %j.next, 1048576
+  br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2
+
+exitLatch:                                            ; preds = %Latch
+  ret i64 1
+
+exit:                                             ; preds = %Header
+  %result.in3.lcssa = phi ptr [ %result.in3, %Header ]
+  %result.le = load i64, ptr %result.in3.lcssa, align 8
+  ret i64 %result.le
+}
+
+!2 = !{!"branch_weights", i32 3, i32 1}
+!21 = !{!"function_entry_count", i64 20000}
+
+declare i64 @llvm.experimental.deoptimize.i64(...)
+declare void @llvm.experimental.guard(i1, ...)


        


More information about the llvm-commits mailing list