[llvm-branch-commits] [llvm] [LoopUnroll] Fix freqs for unconditional latches: introduce tests (PR #191008)
Joel E. Denny via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Apr 8 09:30:14 PDT 2026
https://github.com/jdenny-ornl created https://github.com/llvm/llvm-project/pull/191008
This patch introduces all tests for PR #179520 but with current results so that it is easier to see which results PR #179520 improves. This patch should not land without PR #179520.
>From 1415b28d9801773fe8c40ed37fc5fa10a2455313 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl at gmail.com>
Date: Wed, 8 Apr 2026 11:54:04 -0400
Subject: [PATCH] [LoopUnroll] Fix freqs for unconditional latches: introduce
tests
This patch introduces all tests for PR #179520 but with current
results so that it is easier to see which results PR #179520 improves.
This patch should not land without PR #179520.
---
.../branch-weights-freq/unroll-complete.ll | 530 ++++++++++++++++++
.../branch-weights-freq/unroll-epilog.ll | 270 +++++++--
.../unroll-partial-unconditional-latch.ll | 280 +++++++++
.../branch-weights-freq/unroll-partial.ll | 3 +-
.../LoopUnroll/loop-probability-one.ll | 201 ++++---
5 files changed, 1145 insertions(+), 139 deletions(-)
create mode 100644 llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll
create mode 100644 llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial-unconditional-latch.ll
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll
new file mode 100644
index 0000000000000..fd7df00515e25
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll
@@ -0,0 +1,530 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after complete loop unrolling. The final unrolled iteration
+; unconditionally exits (backedge removed), and other unrolled iterations'
+; latches might unconditionally continue. Either contradicts the original
+; branch weights.
+;
+; (unroll-partial-unconditional-latch.ll tests partial unrolling cases,
+; including cases where the latch of any iteration, including the final, might
+; unconditionally continue.)
+;
+; For each case, we check:
+; - Iteration frequencies
+; - When each is multiplied by the number of original loop bodies that execute
+; within it, they should sum to almost exactly the original loop body
+; frequency.
+; - The only exception is an impossibly high or low original frequency (e.g.,
+; due to bad profile data), for which there exist no new branch weights that
+; can yield that frequency sum. In those cases, we expect the maximum or
+; minimum possible frequency.
+; - CFGs
+; - We verify which branch weights go with which branches and that we did not
+; overlook any other branch weights (no extra !prof or branch_weights).
+; - We also check the number of original loop bodies (represented by a call to
+; @f) that appear within each unrolled iteration.
+; - Branch weight metadata
+; - Checking frequencies already checks whether the branch weights have the
+; expected effect, but we also want to check that we get uniform
+; probabilities/weights (same !prof) across the unrolled iteration latches
+; when expected.
+; - llvm.loop.estimated_trip_count:
+; - There should be none because loops are completely unrolled.
+
+; ------------------------------------------------------------------------------
+; Define LIT substitutions.
+;
+; Before using the following lit substitutions, sed should be called to replace
+; these parameters in %s to produce %t.ll:
+; - @I_0@ is the starting value for the original loop's induction variable.
+; - @MIN@ and @MAX@ are the compile-time known minimum and maximum for the
+; number of original loop iterations, regardless of @I_0 at .
+; - @W@ is the branch weight for the original loop's backedge. That value plus
+; 1 is the original loop body frequency because the exit branch weight is 1.
+;
+; For verifying that the test code produces the original loop body frequency we
+; expect.
+; DEFINE: %{bf-fc} = opt %t.ll -S -passes='print<block-freq>' 2>&1 | \
+; DEFINE: FileCheck %s -check-prefixes
+;
+; For checking the unrolled loop.
+; DEFINE: %{ur-bf} = opt %t.ll -S -passes='loop-unroll,print<block-freq>' 2>&1
+; DEFINE: %{fc} = FileCheck %s \
+; DEFINE: -implicit-check-not='llvm.loop.estimated_trip_count' \
+; DEFINE: -implicit-check-not='!prof' \
+; DEFINE: -implicit-check-not='branch_weights' \
+; DEFINE: -implicit-check-not='call void @f' -check-prefixes
+
+; ------------------------------------------------------------------------------
+; Check 1 max iteration:
+; - Unroll count of >=1 should always produce complete unrolling.
+; - That produces 0 unrolled iteration latches, so there are no branch weights
+; to compute.
+;
+; Original loop body frequency is 2 (loop weight 1), which is impossibly high.
+;
+; RUN: sed -e s/@MAX@/1/ -e s/@W@/1/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG1210
+; RUN: %{ur-bf} -unroll-count=1 | %{fc} UR1210
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR1210
+;
+; The new do.body is less than the old do.body, which is impossibly high.
+; ORIG1210: - do.body: float = 2.0,
+; UR1210: - do.body: float = 1.0,
+;
+; UR1210: call void @f
+;
+; Original loop body frequency is 1 (loop weight 0).
+;
+; RUN: sed -e s/@MAX@/1/ -e s/@W@/0/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG1110
+; RUN: %{ur-bf} -unroll-count=1 | %{fc} UR1110
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR1110
+;
+; The the new do.body equals the old do.body.
+; ORIG1110: - do.body: float = 1.0,
+; UR1110: - do.body: float = 1.0,
+;
+; UR1110: call void @f
+
+; ------------------------------------------------------------------------------
+; Check 2 max iterations:
+; - Unroll count of >=2 should always produce complete unrolling.
+; - That produces <=1 unrolled iteration latch, so the implementation can
+; compute uniform weights by solving, at worst, a linear equation.
+;
+; Original loop body frequency is 3 (loop weight 2), which is impossibly high.
+;
+; First use a variable iteration count so that the sole non-final unrolled
+; iteration's latch remains conditional.
+;
+; RUN: sed -e s/@MAX@/2/ -e s/@W@/2/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG2310
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2310
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2310
+;
+; The sum of the new do.body* cannot reach the old do.body, which is
+; impossibly high.
+; ORIG2310: - do.body: float = 3.0,
+; UR2310: - do.body: float = 1.0,
+; FIXME: Should be 1.0:
+; UR2310: - do.body.1: float = 0.66667
+;
+; The sole probability is maximized to try to reach the original frequency.
+; UR2310: call void @f
+; UR2310: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR2310: call void @f
+; UR2310: br label %do.end
+; FIXME: Should be (0, non-zero):
+; UR2310: !0 = !{!"branch_weights", i32 1, i32 2}
+;
+; Now use a constant iteration count so that the sole non-final unrolled
+; iteration's latch unconditionally continues.
+;
+; RUN: sed -e s/@MAX@/2/ -e s/@W@/2/ -e s/@MIN@/2/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG2320
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2320
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2320
+;
+; The new do.body contains 2 of the original loop's iterations, so multiply
+; it by 2, which is less than the old do.body, which is impossibly high.
+; ORIG2320: - do.body: float = 3.0,
+; UR2320: - do.body: float = 1.0,
+;
+; UR2320: call void @f
+; UR2320-NOT: br
+; UR2320: call void @f
+; UR2320: ret void
+;
+; Original loop body frequency is 2 (loop weight 1).
+;
+; First use a variable iteration count so that the sole non-final unrolled
+; iteration's latch remains conditional.
+;
+; RUN: sed -e s/@MAX@/2/ -e s/@W@/1/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG2210
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2210
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2210
+;
+; The sum of the new do.body* is the old do.body.
+; ORIG2210: - do.body: float = 2.0,
+; UR2210: - do.body: float = 1.0,
+; FIXME: Should be 1.0:
+; UR2210: - do.body.1: float = 0.5,
+;
+; UR2210: call void @f
+; UR2210: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR2210: call void @f
+; UR2210: br label %do.end
+; FIXME: Should be (0, non-zero):
+; UR2210: !0 = !{!"branch_weights", i32 1, i32 1}
+;
+; Now use a constant iteration count so that the sole non-final unrolled
+; iteration's latch unconditionally continues.
+;
+; RUN: sed -e s/@MAX@/2/ -e s/@W@/1/ -e s/@MIN@/2/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG2220
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2220
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2220
+;
+; The new do.body contains 2 of the original loop's iterations, so multiply
+; it by 2 to get the old do.body.
+; ORIG2220: - do.body: float = 2.0,
+; UR2220: - do.body: float = 1.0,
+;
+; UR2220: call void @f
+; UR2220-NOT: br
+; UR2220: call void @f
+; UR2220: ret void
+;
+; Original loop body frequency is 1 (loop weight 0).
+;
+; First use a variable iteration count so that the sole non-final unrolled
+; iteration's latch remains conditional.
+;
+; RUN: sed -e s/@MAX@/2/ -e s/@W@/0/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG2110
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2110
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2110
+;
+; The sum of the new do.body* is approximately the old do.body.
+; ORIG2110: - do.body: float = 1.0,
+; UR2110: - do.body: float = 1.0,
+; UR2110: - do.body.1: float = 0.0{{(0000[0-9]*)?}},
+;
+; UR2110: call void @f
+; UR2110: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR2110: call void @f
+; UR2110: br label %do.end
+; UR2110: !0 = !{!"branch_weights", i32 1, i32 0}
+;
+; Now use a constant iteration count so that the sole non-final unrolled
+; iteration's latch unconditionally continues.
+;
+; RUN: sed -e s/@MAX@/2/ -e s/@W@/0/ -e s/@MIN@/2/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG2120
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2120
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2120
+;
+; The new do.body contains 2 of the original loop's iterations, so multiply
+; it by 2, which is greater than the old do.body, which is impossibly low.
+; ORIG2120: - do.body: float = 1.0,
+; UR2120: - do.body: float = 1.0,
+;
+; UR2120: call void @f
+; UR2220-NOT: br
+; UR2120: call void @f
+; UR2120: ret void
+
+; ------------------------------------------------------------------------------
+; Check 3 max iterations:
+; - Unroll count of >=3 should always produce complete unrolling.
+; - That produces <=2 unrolled iteration latches, so the implementation can
+; compute uniform weights solving, at worst, a quadratic equation.
+;
+; Original loop body frequency is 4 (loop weight 3), which is impossibly high.
+;
+; First use a variable iteration count so that all non-final unrolled
+; iterations' latches remain conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/3/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3410
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3410
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3410
+;
+; The sum of the new do.body* cannot reach the old do.body, which is
+; impossibly high.
+; ORIG3410: - do.body: float = 4.0,
+; UR3410: - do.body: float = 1.0,
+; FIXME: Should be 1.0:
+; UR3410: - do.body.1: float = 0.75,
+; FIXME: Should be 1.0:
+; UR3410: - do.body.2: float = 0.5625,
+;
+; The probabilities are maximized to try to reach the original frequency.
+; UR3410: call void @f
+; UR3410: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR3410: call void @f
+; UR3410: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+; UR3410: call void @f
+; UR3410: br label %do.end
+; FIXME: Should be (0, non-zero):
+; UR3410: !0 = !{!"branch_weights", i32 1, i32 3}
+;
+; Now use a constant iteration count so that all non-final unrolled
+; iterations' latches unconditionally continue.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/3/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3430
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3430
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3430
+;
+; The new do.body contains 3 of the original loop's iterations, so multiply
+; it by 3, which is less than the old do.body, which is impossibly high.
+; ORIG3430: - do.body: float = 4.0,
+; UR3430: - do.body: float = 1.0,
+;
+; UR3430: call void @f
+; UR3430-NOT: br
+; UR3430: call void @f
+; UR3430-NOT: br
+; UR3430: call void @f
+; UR3430: ret void
+;
+; Use a constant iteration count but now the loop upper bound computation can
+; overflow. When it does, the loop induction variable is greater than it
+; immediately, so the initial unrolled iteration's latch remains conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/3/ -e s/@MIN@/3/ -e s/@I_0@/%x/ %s > %t.ll
+; RUN: %{bf-fc} ORIG343x
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR343x
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR343x
+;
+; The new do.body.1 contains 2 of the original loop's iterations, so
+; multiply it by 2, and add the new do.body, but that sum is less than the
+; old do.body, which is impossibly high.
+; ORIG343x: - do.body: float = 4.0,
+; UR343x: - do.body: float = 1.0,
+; FIXME: Should be 1.0:
+; UR343x: - do.body.1: float = 0.75,
+;
+; The sole probability is maximized to try to reach the original frequency.
+; UR343x: call void @f
+; UR343x: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR343x: call void @f
+; UR343x-NOT: br
+; UR343x: call void @f
+; UR343x: ret void
+; FIXME: Should be (0, non-zero):
+; UR343x: !0 = !{!"branch_weights", i32 1, i32 3}
+;
+; Original loop body frequency is 3 (loop weight 2).
+;
+; First use a variable iteration count so that all non-final unrolled
+; iterations' latches remain conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/2/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3310
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3310
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3310
+;
+; The sum of the new do.body* is the old do.body.
+; ORIG3310: - do.body: float = 3.0,
+; UR3310: - do.body: float = 1.0,
+; FIXME: Should be 1.0:
+; UR3310: - do.body.1: float = 0.66667,
+; FIXME: Should be 1.0:
+; UR3310: - do.body.2: float = 0.44444,
+;
+; UR3310: call void @f
+; UR3310: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR3310: call void @f
+; UR3310: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+; UR3310: call void @f
+; UR3310: br label %do.end
+; FIXME: Should be (very small, very large):
+; UR3310: !0 = !{!"branch_weights", i32 1, i32 2}
+;
+; Now use a constant iteration count so that all non-final unrolled
+; iterations' latches unconditionally continue.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/2/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3330
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3330
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3330
+;
+; The new do.body contains 3 of the original loop's iterations, so multiply
+; it by 3 to get the old do.body.
+; ORIG3330: - do.body: float = 3.0,
+; UR3330: - do.body: float = 1.0,
+;
+; UR3330: call void @f
+; UR3330-NOT: br
+; UR3330: call void @f
+; UR3330-NOT: br
+; UR3330: call void @f
+; UR3330: ret void
+;
+; Use a constant iteration count but now the loop upper bound computation can
+; overflow. When it does, the loop induction variable is greater than it
+; immediately, so the initial unrolled iteration's latch remains conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/2/ -e s/@MIN@/3/ -e s/@I_0@/%x/ %s > %t.ll
+; RUN: %{bf-fc} ORIG333x
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR333x
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR333x
+;
+; The new do.body.1 contains 2 of the original loop's iterations, so
+; multiply it by 2, and add the new do.body to get the old do.body.
+; ORIG333x: - do.body: float = 3.0,
+; UR333x: - do.body: float = 1.0,
+; FIXME: Should be 1.0:
+; UR333x: - do.body.1: float = 0.66667,
+;
+; UR333x: call void @f
+; UR333x: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR333x: call void @f
+; UR333x-NOT: br
+; UR333x: call void @f
+; UR333x: br label %do.end
+; FIXME: Should be (very small, very large):
+; UR333x: !0 = !{!"branch_weights", i32 1, i32 2}
+;
+; Original loop body frequency is 2 (loop weight 1). This is our first case
+; where new frequencies and probabilities are not all approximately 1 or 0.
+;
+; First use a variable iteration count so that all non-final unrolled
+; iterations' latches remain conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/1/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3210
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3210
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3210
+;
+; The sum of the new do.body* is the old do.body.
+; ORIG3210: - do.body: float = 2.0,
+; UR3210: - do.body: float = 1.0,
+; FIXME: Should sum to 1.0:
+; UR3210: - do.body.1: float = 0.5,
+; UR3210: - do.body.2: float = 0.25,
+;
+; UR3210: call void @f
+; UR3210: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR3210: call void @f
+; UR3210: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+; UR3210: call void @f
+; UR3210: br label %do.end
+; UR3210: !0 = !{!"branch_weights", i32 1, i32 1}
+;
+; Now use a constant iteration count so that all non-final unrolled
+; iterations' latches unconditionally continue.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/1/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3230
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3230
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3230
+;
+; The new do.body contains 3 of the original loop's iterations, so multiply
+; it by 3, which is greater than the old do.body, which is impossibly low.
+; ORIG3230: - do.body: float = 2.0,
+; UR3230: - do.body: float = 1.0,
+;
+; UR3230: call void @f
+; UR3230-NOT: br
+; UR3230: call void @f
+; UR3230-NOT: br
+; UR3230: call void @f
+; UR3230: ret void
+;
+; Use a constant iteration count but now the loop upper bound computation can
+; overflow. When it does, the loop induction variable is greater than it
+; immediately, so the initial unrolled iteration's latch remains conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/1/ -e s/@MIN@/3/ -e s/@I_0@/%x/ %s > %t.ll
+; RUN: %{bf-fc} ORIG323x
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR323x
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR323x
+;
+; The new do.body.1 contains 2 of the original loop's iterations, so
+; multiply it by 2, and add the new do.body to get the old do.body.
+; ORIG323x: - do.body: float = 2.0,
+; UR323x: - do.body: float = 1.0,
+; UR323x: - do.body.1: float = 0.5,
+;
+; UR323x: call void @f
+; UR323x: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR323x: call void @f
+; UR323x-NOT: br
+; UR323x: call void @f
+; UR323x: br label %do.end
+; UR323x: !0 = !{!"branch_weights", i32 1, i32 1}
+;
+; Original loop body frequency is 1 (loop weight 0).
+;
+; First use a variable iteration count so that all non-final unrolled
+; iterations' latches remain conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/0/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3110
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3110
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3110
+;
+; The sum of the new do.body* is approximately the old do.body.
+; ORIG3110: - do.body: float = 1.0,
+; UR3110: - do.body: float = 1.0,
+; UR3110: - do.body.1: float = 0.0{{(0000[0-9]*)?}},
+; UR3110: - do.body.2: float = 0.0{{(0000[0-9]*)?}},
+;
+; UR3110: call void @f
+; UR3110: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR3110: call void @f
+; UR3110: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+; UR3110: call void @f
+; UR3110: br label %do.end
+; UR3110: !0 = !{!"branch_weights", i32 1, i32 0}
+;
+; Now use a constant iteration count so that all non-final unrolled
+; iterations' latches unconditionally continue.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/0/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+; RUN: %{bf-fc} ORIG3130
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3130
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3130
+;
+; The new do.body contains 3 of the original loop's iterations, so multiply
+; it by 3, which is greater than the old do.body, which is impossibly low.
+; ORIG3130: - do.body: float = 1.0,
+; UR3130: - do.body: float = 1.0,
+;
+; UR3130: call void @f
+; UR3130-NOT: br
+; UR3130: call void @f
+; UR3130-NOT: br
+; UR3130: call void @f
+; UR3130: ret void
+;
+; Use a constant iteration count but now the loop upper bound computation can
+; overflow. When it does, the loop induction variable is greater than it
+; immediately, so the initial unrolled iteration's latch remains conditional.
+;
+; RUN: sed -e s/@MAX@/3/ -e s/@W@/0/ -e s/@MIN@/3/ -e s/@I_0@/%x/ %s > %t.ll
+; RUN: %{bf-fc} ORIG313x
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} UR313x
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR313x
+;
+; The new do.body.1 contains 2 of the original loop's iterations, so
+; multiply it by 2, and add the new do.body to get approximately the old
+; do.body.
+; ORIG313x: - do.body: float = 1.0,
+; UR313x: - do.body: float = 1.0,
+; UR313x: - do.body.1: float = 0.0{{(0000[0-9]*)?}},
+;
+; UR313x: call void @f
+; UR313x: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+; UR313x: call void @f
+; UR313x-NOT: br
+; UR313x: call void @f
+; UR313x: br label %do.end
+; UR313x: !0 = !{!"branch_weights", i32 1, i32 0}
+
+declare void @f(i32)
+
+define void @test(i32 %x, i32 %n) {
+entry:
+ %n.min = call i32 @llvm.umax.i32(i32 %n, i32 @MIN@)
+ %n.minmax = call i32 @llvm.umin.i32(i32 %n.min, i32 @MAX@)
+ %i_n = add i32 @I_0@, %n.minmax
+ br label %do.body
+
+do.body:
+ %i = phi i32 [ @I_0@, %entry ], [ %inc, %do.body ]
+ %inc = add i32 %i, 1
+ call void @f(i32 %i)
+ %c = icmp uge i32 %inc, %i_n
+ br i1 %c, label %do.end, label %do.body, !prof !0
+
+do.end:
+ ret void
+}
+
+; Loop body frequency is @W@ + 1.
+!0 = !{!"branch_weights", i32 1, i32 @W@}
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll
index f5d05e666cabb..8b5a88bd6e8cd 100644
--- a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll
@@ -2,7 +2,14 @@
; frequencies after loop unrolling with an epilogue.
;
; We check various interesting unroll count values relative to the original
-; loop's body frequency of 11 (e.g., minimum and boundary values).
+; loop's body frequency of 11, and we check when the epilogue loop itself is and
+; is not unrolled.
+;
+; Without -unroll-remainder, the epilogue is unrolled only at -unroll-count=2
+; because there it has only 1 iteration and so is always completely unrolled.
+; With -unroll-remainder, for some reason related to computing the remainder in
+; two's complement, the epilogue is completely unrolled only when -unroll-count
+; is a power of 2.
;
; For each case, we check:
; - Iteration frequencies
@@ -14,13 +21,21 @@
; overlook any other branch weights (no extra !prof or branch_weights).
; - We also check the number of original loop bodies (represented by a call to
; @f) that appear within each unrolled iteration.
+; - Branch weight metadata
+; - Checking frequencies already checks whether the branch weights have the
+; expected effect, but we also want to check the following.
+; - Whether the epilogue loop is unrolled should not affect the unrolled
+; loop's estimated trip count or the branch weights on the unrolled loop
+; guard, unrolled loop latch, or epilogue loop guard.
+; - We get uniform probabilities/weights (same !prof) across the epilogue
+; iteration latches when expected.
; - llvm.loop.estimated_trip_count
-; - For the unrolled and epilogue loops, must be the number of iterations
+; - For the unrolled and epilogue loops, it must be the number of iterations
; required for the original loop body to reach its original estimated trip
; count, which is its original frequency, 11, because there is no prior
; llvm.loop.estimated_trip_count.
-; - Must not be blindly duplicated between the unrolled and epilogue loops.
-; - Must not be blindly computed from any new latch branch weights.
+; - It must not be blindly duplicated between the unrolled and epilogue loops.
+; - It must not be blindly computed from any new latch branch weights.
; ------------------------------------------------------------------------------
; Verify that the test code produces the original loop body frequency we expect.
@@ -45,6 +60,7 @@
; Check -unroll-count=2.
;
; RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2
+; RUN: %{ur-bf} -unroll-count=2 -unroll-remainder | %{fc} UR2
;
; Multiply do.body by 2 and add do.body.epil to get the original loop body
; frequency, 11.
@@ -72,22 +88,36 @@
; ------------------------------------------------------------------------------
; Check -unroll-count=4.
;
-; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR4
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} UR4,UR4-ELP
+; RUN: %{ur-bf} -unroll-count=4 -unroll-remainder | %{fc} UR4,UR4-EUR
;
-; Multiply do.body by 4 and add do.body.epil* to get the original loop body
-; frequency, 11.
-; UR4: - do.body: float = 2.3702,
-; UR4: - do.body.epil: float = 1.5193,
+; Multiply do.body by 4 and add do.body.epil* for either ELP or EUR to get the
+; original loop body frequency, 11.
+; UR4: - do.body: float = 2.3702,
+; UR4-ELP: - do.body.epil: float = 1.5193,
+; FIXME: Should sum to 1.5193:
+; UR4-EUR: - do.body.epil: float = 0.78453,
+; UR4-EUR: - do.body.epil.1: float = 0.37941,
+; UR4-EUR: - do.body.epil.2: float = 0.18349,
;
; Unrolled loop guard, body, and latch.
; UR4: br i1 %{{.*}}, label %do.body.epil.preheader, label %entry.new, !prof !0
; UR4-COUNT-4: call void @f
; UR4: br i1 %{{.*}}, label %do.end.unr-lcssa, label %do.body, !prof !1, !llvm.loop !2
;
-; Epilogue guard and loop.
+; Epilogue guard.
; UR4: br i1 %{{.*}}, label %do.body.epil.preheader, label %do.end, !prof !5
-; UR4: call void @f
-; UR4: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Non-unrolled epilogue loop.
+; UR4-ELP: call void @f
+; UR4-ELP: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Completely unrolled epilogue loop.
+; UR4-EUR: call void @f
+; UR4-EUR: br i1 %{{.*}}, label %do.body.epil.1, label %do.end.epilog-lcssa, !prof !6
+; UR4-EUR: call void @f
+; UR4-EUR: br i1 %{{.*}}, label %do.body.epil.2, label %do.end.epilog-lcssa, !prof !6
+; UR4-EUR: call void @f
;
; Unrolled loop metadata.
; UR4: !0 = !{!"branch_weights", i32 534047398, i32 1613436250}
@@ -97,30 +127,69 @@
; UR4: !4 = !{!"llvm.loop.unroll.disable"}
; UR4: !5 = !{!"branch_weights", i32 1531603292, i32 615880356}
;
-; Epilogue loop metadata.
-; UR4: !6 = !{!"branch_weights", i32 1038564635, i32 1108919013}
-; UR4: !7 = distinct !{!7, !8, !4}
-; UR4: !8 = !{!"llvm.loop.estimated_trip_count", i32 3}
+; Non-unrolled epilogue loop metadata.
+; UR4-ELP: !6 = !{!"branch_weights", i32 1038564635, i32 1108919013}
+; UR4-ELP: !7 = distinct !{!7, !8, !4}
+; UR4-ELP: !8 = !{!"llvm.loop.estimated_trip_count", i32 3}
+;
+; Completely unrolled epilogue loop metadata. Because it loses its backedge:
+; - The remaining conditional latches' branch weights must be adjusted relative
+; to the non-unrolled case. There are only two, so the implementation can
+; compute uniform branch weights using the quadratic formula.
+; - It has no llvm.loop.estimated_trip_count.
+; UR4-EUR: !6 = !{!"branch_weights", i32 1038564635, i32 1108919013}
; ------------------------------------------------------------------------------
; Check -unroll-count=10.
;
-; RUN: %{ur-bf} -unroll-count=10 | %{fc} UR10
-;
-; Multiply do.body by 8 and add do.body.epil* to get the original loop body
-; frequency, 11.
-; UR10: - do.body: float = 0.6902,
-; UR10: - do.body.epil: float = 4.098,
+; RUN: %{ur-bf} -unroll-count=10 | %{fc} UR10,UR10-ELP
+; RUN: %{ur-bf} -unroll-count=10 -unroll-remainder | %{fc} UR10,UR10-EUR
+;
+; Multiply do.body by 10 and add do.body.epil* for either ELP or EUR to get the
+; original loop body frequency, 11.
+; UR10: - do.body: float = 0.6902,
+; UR10-ELP: - do.body.epil: float = 4.098,
+; UR10-EUR: - do.body.epil: float = 1.0375,
+; UR10-EUR: - do.body.epil.1: float = 0.80019,
+; UR10-EUR: - do.body.epil.2: float = 0.61718,
+; UR10-EUR: - do.body.epil.3: float = 0.47602,
+; UR10-EUR: - do.body.epil.4: float = 0.36715,
+; UR10-EUR: - do.body.epil.5: float = 0.28318,
+; UR10-EUR: - do.body.epil.6: float = 0.21841,
+; UR10-EUR: - do.body.epil.7: float = 0.16846,
+; UR10-EUR: - do.body.epil.8: float = 0.12993,
;
; Unrolled loop guard, body, and latch.
; UR10: br i1 %{{.*}}, label %do.body.epil.preheader, label %entry.new, !prof !0
; UR10-COUNT-10: call void @f
; UR10: br i1 %{{.*}}, label %do.end.unr-lcssa, label %do.body, !prof !1, !llvm.loop !2
;
-; Epilogue guard and loop.
+; Epilogue guard.
; UR10: br i1 %{{.*}}, label %do.body.epil.preheader, label %do.end, !prof !5
-; UR10: call void @f
-; UR10: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Non-unrolled epilogue loop.
+; UR10-ELP: call void @f
+; UR10-ELP: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Partially unrolled epilogue loop.
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.1, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.2, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.3, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.4, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.5, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.6, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.7, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil.8, label %do.end.epilog-lcssa, !prof !6
+; UR10-EUR: call void @f
+; UR10-EUR: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
;
; Unrolled loop metadata.
; UR10: !0 = !{!"branch_weights", i32 1236740947, i32 910742701}
@@ -130,30 +199,69 @@
; UR10: !4 = !{!"llvm.loop.unroll.disable"}
; UR10: !5 = !{!"branch_weights", i32 1829762672, i32 317720976}
;
-; Epilogue loop metadata. Its llvm.loop.estimated_trip_count happens to be the
-; same as the unrolled loop's, so there's no new metadata node.
-; UR10: !6 = !{!"branch_weights", i32 1656332913, i32 491150735}
-; UR10: !7 = distinct !{!7, ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; The unrolled epilogue loop does not lose any conditional branches, so:
+; - The non-unrolled epilogue branch weights are shared across them.
+; - This is our first case where the unrolled epilogue loop has an
+; llvm.loop.estimated_trip_count. However, it happens to be the same as the
+; unrolled loop's, so there's no new metadata node.
+; UR10: !6 = !{!"branch_weights", i32 1656332913, i32 491150735}
+; UR10-ELP: !7 = distinct !{!7, !3, !4}
+; UR10-EUR: !7 = distinct !{!7, !3}
; ------------------------------------------------------------------------------
; Check -unroll-count=11.
;
-; RUN: %{ur-bf} -unroll-count=11 | %{fc} UR11
-;
-; Multiply do.body by 11 and add do.body.epil* to get the original loop body
-; frequency, 11.
-; UR11: - do.body: float = 0.59359,
-; UR11: - do.body.epil: float = 4.4705,
+; RUN: %{ur-bf} -unroll-count=11 | %{fc} UR11,UR11-ELP
+; RUN: %{ur-bf} -unroll-count=11 -unroll-remainder | %{fc} UR11,UR11-EUR
+;
+; Multiply do.body by 11 and add do.body.epil* for either ELP or EUR to get the
+; original loop body frequency, 11.
+; UR11: - do.body: float = 0.59359,
+; UR11-ELP: - do.body.epil: float = 4.4705,
+; UR11-EUR: - do.body.epil: float = 1.0428,
+; UR11-EUR: - do.body.epil.1: float = 0.82209,
+; UR11-EUR: - do.body.epil.2: float = 0.64812,
+; UR11-EUR: - do.body.epil.3: float = 0.51097,
+; UR11-EUR: - do.body.epil.4: float = 0.40284,
+; UR11-EUR: - do.body.epil.5: float = 0.31759,
+; UR11-EUR: - do.body.epil.6: float = 0.25038,
+; UR11-EUR: - do.body.epil.7: float = 0.1974,
+; UR11-EUR: - do.body.epil.8: float = 0.15562,
+; UR11-EUR: - do.body.epil.9: float = 0.12269,
;
; Unrolled loop guard, body, and latch.
; UR11: br i1 %{{.*}}, label %do.body.epil.preheader, label %entry.new, !prof !0
; UR11-COUNT-11: call void @f
; UR11: br i1 %{{.*}}, label %do.end.unr-lcssa, label %do.body, !prof !1, !llvm.loop !2
-
-; Epilogue guard and loop.
+;
+; Epilogue guard.
; UR11: br i1 %{{.*}}, label %do.body.epil.preheader, label %do.end, !prof !5
-; UR11: call void @f
-; UR11: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Non-unrolled epilogue loop.
+; UR11-ELP: call void @f
+; UR11-ELP: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Partially unrolled epilogue loop.
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.1, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.2, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.3, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.4, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.5, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.6, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.7, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.8, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil.9, label %do.end.epilog-lcssa, !prof !6
+; UR11-EUR: call void @f
+; UR11-EUR: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
;
; Unrolled loop metadata.
; UR11: !0 = !{!"branch_weights", i32 1319535738, i32 827947910}
@@ -163,30 +271,74 @@
; UR11: !4 = !{!"llvm.loop.unroll.disable"}
; UR11: !5 = !{!"branch_weights", i32 1846907894, i32 300575754}
;
-; Epilogue loop metadata.
-; UR11: !6 = !{!"branch_weights", i32 1693034047, i32 454449601}
-; UR11: !7 = distinct !{!7, !8, !4}
-; UR11: !8 = !{!"llvm.loop.estimated_trip_count", i32 0}
+; The unrolled epilogue loop does not lose any conditional branches, so:
+; - The non-unrolled epilogue branch weights are shared across them.
+; - The unrolled epilogue loop has an llvm.loop.estimated_trip_count. This is
+; our first case where it is different than the unrolled loop's, so it has its
+; own metadata node. But it happens to be the same as the non-unrolled
+; epilogue loop's.
+; UR11: !6 = !{!"branch_weights", i32 1693034047, i32 454449601}
+; UR11-ELP: !7 = distinct !{!7, !8, !4}
+; UR11-EUR: !7 = distinct !{!7, !8}
+; UR11: !8 = !{!"llvm.loop.estimated_trip_count", i32 0}
; ------------------------------------------------------------------------------
; Check -unroll-count=12.
;
-; RUN: %{ur-bf} -unroll-count=12 | %{fc} UR12
-;
-; Multiply do.body by 12 and add do.body.epil* to get the original loop body
-; frequency, 11.
-; UR12: - do.body: float = 0.5144,
-; UR12: - do.body.epil: float = 4.8272,
+; RUN: %{ur-bf} -unroll-count=12 | %{fc} UR12,UR12-ELP
+; RUN: %{ur-bf} -unroll-count=12 -unroll-remainder | %{fc} UR12,UR12-EUR
+;
+; Multiply do.body by 12 and add do.body.epil* for either ELP or EUR to get the
+; original loop body frequency, 11.
+; UR12: - do.body: float = 0.5144,
+; UR12-ELP: - do.body.epil: float = 4.8272,
+; UR12-EUR: - do.body.epil: float = 1.0463,
+; UR12-EUR: - do.body.epil.1: float = 0.83968,
+; UR12-EUR: - do.body.epil.2: float = 0.67387,
+; UR12-EUR: - do.body.epil.3: float = 0.5408,
+; UR12-EUR: - do.body.epil.4: float = 0.43401,
+; UR12-EUR: - do.body.epil.5: float = 0.3483,
+; UR12-EUR: - do.body.epil.6: float = 0.27952,
+; UR12-EUR: - do.body.epil.7: float = 0.22433,
+; UR12-EUR: - do.body.epil.8: float = 0.18003,
+; UR12-EUR: - do.body.epil.9: float = 0.14448,
+; UR12-EUR: - do.body.epil.10: float = 0.11595,
;
; Unrolled loop guard, body, and latch.
; UR12: br i1 %{{.*}}, label %do.body.epil.preheader, label %entry.new, !prof !0
; UR12-COUNT-12: call void @f
; UR12: br i1 %{{.*}}, label %do.end.unr-lcssa, label %do.body, !prof !1, !llvm.loop !2
;
-; Epilogue guard and loop.
+; Epilogue guard.
; UR12: br i1 %{{.*}}, label %do.body.epil.preheader, label %do.end, !prof !5
-; UR12: call void @f
-; UR12: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Non-unrolled epilogue loop.
+; UR12-ELP: call void @f
+; UR12-ELP: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
+;
+; Partially unrolled epilogue loop.
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.1, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.2, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.3, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.4, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.5, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.6, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.7, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.8, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.9, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil.10, label %do.end.epilog-lcssa, !prof !6
+; UR12-EUR: call void @f
+; UR12-EUR: br i1 %{{.*}}, label %do.body.epil, label %do.end.epilog-lcssa, !prof !6, !llvm.loop !7
;
; Unrolled loop metadata.
; UR12: !0 = !{!"branch_weights", i32 1394803730, i32 752679918}
@@ -196,10 +348,16 @@
; UR12: !4 = !{!"llvm.loop.unroll.disable"}
; UR12: !5 = !{!"branch_weights", i32 1860963812, i32 286519836}
;
-; Epilogue loop metadata.
-; UR12: !6 = !{!"branch_weights", i32 1723419551, i32 424064097}
-; UR12: !7 = distinct !{!7, !8, !4}
-; UR12: !8 = !{!"llvm.loop.estimated_trip_count", i32 11}
+; The unrolled epilogue loop does not lose any conditional branches, so:
+; - The non-unrolled epilogue branch weights are shared across them.
+; - The unrolled epilogue loop has an llvm.loop.estimated_trip_count. This is
+; our first case where it is different than both the unrolled loop's and the
+; non-unrolled epilogue loop's, so they all have distinct metadata nodes.
+; UR12: !6 = !{!"branch_weights", i32 1723419551, i32 424064097}
+; UR12-ELP: !7 = distinct !{!7, !8, !4}
+; UR12-ELP: !8 = !{!"llvm.loop.estimated_trip_count", i32 11}
+; UR12-EUR: !7 = distinct !{!7, !8}
+; UR12-EUR: !8 = !{!"llvm.loop.estimated_trip_count", i32 1}
declare void @f(i32)
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial-unconditional-latch.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial-unconditional-latch.ll
new file mode 100644
index 0000000000000..dafb2a3ca4ed9
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial-unconditional-latch.ll
@@ -0,0 +1,280 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after partial loop unrolling without -unroll-runtime such that
+; some iterations' latches become unconditional, which often contradicts the
+; original branch weights.
+;
+; (unroll-complete.ll tests complete loop unrolling, in which the final unrolled
+; iteration unconditionally exits (backedge removed). Here, we include cases
+; where the final iteration's latch unconditionally continues instead.)
+;
+; For each case, we check:
+; - Iteration frequencies
+; - When each is multiplied by the number of original loop bodies that execute
+; within it, they should sum to almost exactly the original loop body
+; frequency.
+; - The only exception is an impossibly high or low original frequency (e.g.,
+; due to bad profile data), for which there exist no new branch weights that
+; can yield that frequency sum. In those cases, we expect the maximum or
+; minimum possible frequency.
+; - CFGs
+; - We verify which branch weights go with which branches and that we did not
+; overlook any other branch weights (no extra !prof or branch_weights).
+; - We also check the number of original loop bodies (represented by a call to
+; @f) that appear within each unrolled iteration.
+; - Branch weight metadata
+; - Checking frequencies already checks whether the branch weights have the
+; expected effect, but we also want to check that we get uniform
+; probabilities/weights (same !prof) across the unrolled iteration latches
+; when expected.
+; - llvm.loop.estimated_trip_count
+; - It must be the number of iterations of the unrolled loop required for the
+; original loop body to reach its original frequency.
+; - It must not be blindly computed from any new latch branch weights.
+
+; ------------------------------------------------------------------------------
+; Define LIT substitutions.
+;
+; For verifying that the test code produces the original loop body frequency we
+; expect.
+; DEFINE: %{bf-fc} = opt %t.ll -S -passes='print<block-freq>' 2>&1 | \
+; DEFINE: FileCheck %s -check-prefixes
+;
+; For checking the unrolled loop:
+; DEFINE: %{ur-bf} = opt %t.ll -S -passes='loop-unroll,print<block-freq>' 2>&1
+; DEFINE: %{fc} = FileCheck %s \
+; DEFINE: -implicit-check-not='llvm.loop.estimated_trip_count' \
+; DEFINE: -implicit-check-not='!prof' \
+; DEFINE: -implicit-check-not='branch_weights' \
+; DEFINE: -implicit-check-not='call void @f' -check-prefixes
+
+; ------------------------------------------------------------------------------
+; Check cases when the original loop's number of iterations is a run-time
+; determined multiple of 10 and the original loop body frequency is 10.
+;
+; RUN: sed -e s/@N@/%mul10/ -e s/@W@/9/ %s > %t.ll
+;
+; At compile time, possibilities for that value always include unroll count x 10
+; x N for any integer N >= 1, so the unrolled loop's backedge always remains
+; conditional, so we check cases where it becomes unconditional later in this
+; test file with the CONST4 config.
+;
+; Check the original loop body frequency.
+;
+; RUN: %{bf-fc} MULT-ORIG
+; MULT-ORIG: - do.body: float = 10.0,
+;
+; When the unroll count is odd, every iteration's latch remains conditional, so
+; their original probabilities are not contradicted. That is, the original loop
+; latch's branch weights remain on all unrolled iterations' latches.
+;
+; RUN: %{ur-bf} -unroll-count=3 | %{fc} MULT3
+;
+; Sums to approximately the original loop body frequency, 10.
+; MULT3: - do.body: float = 3.69,
+; MULT3: - do.body.1: float = 3.321,
+; MULT3: - do.body.2: float = 2.9889,
+;
+; MULT3: call void @f
+; MULT3: br i1 %{{.*}}, label %do.body.1, label %do.end, !prof !0
+; MULT3: call void @f
+; MULT3: br i1 %{{.*}}, label %do.body.2, label %do.end, !prof !0
+; MULT3: call void @f
+; MULT3: br i1 %{{.*}}, label %do.body, label %do.end, !prof !0, !llvm.loop !1
+;
+; MULT3: !0 = !{!"branch_weights", i32 9, i32 1}
+; MULT3: !1 = distinct !{!1, !2, !3}
+; MULT3: !2 = !{!"llvm.loop.estimated_trip_count", i32 4}
+; MULT3: !3 = !{!"llvm.loop.unroll.disable"}
+;
+; When the unroll count is even, odd-numbered unrolled iterations become
+; unconditional, so branch weights must be adjusted.
+;
+; -unroll-count=2, so there is 1 remaining conditional latch, so the
+; implementation can compute uniform weights by solving a linear equation.
+;
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} MULT2
+;
+; Multiply by 2 to get the original loop body frequency, 10.
+; FIXME: Should sum to 5.0:
+; MULT2: - do.body: float = 10.0,
+;
+; MULT2: call void @f
+; MULT2-NOT: br
+; MULT2: call void @f
+; MULT2: br i1 %{{.*}}, label %do.body, label %do.end, !prof !0, !llvm.loop !1{{$}}
+;
+; The branch weights imply the estimated trip count is
+; (1717986918+429496730)/429496730 = approximately (8+2)/2 = 5.
+; FIXME: Or at least they should.
+; MULT2: !0 = !{!"branch_weights", i32 9, i32 1}
+; MULT2: !1 = distinct !{!1, !2, !3}
+; MULT2: !2 = !{!"llvm.loop.estimated_trip_count", i32 5}
+; MULT2: !3 = !{!"llvm.loop.unroll.disable"}
+;
+; -unroll-count=4, so there are 2 remaining conditional latches, so the
+; implementation can compute uniform weights using the quadratic formula.
+;
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} MULT4
+;
+; Multiply by 2 and sum to get the original loop body frequency, 10.
+; FIXME: Should sum to 5.0:
+; MULT4: - do.body: float = 5.2632,
+; MULT4: - do.body.2: float = 4.7368,
+;
+; MULT4: call void @f
+; MULT4-NOT: br
+; MULT4: call void @f
+; MULT4: br i1 %{{.*}}, label %do.body.2, label %do.end, !prof !0
+; MULT4: call void @f
+; MULT4-NOT: br
+; MULT4: call void @f
+; MULT4: br i1 %{{.*}}, label %do.body, label %do.end, !prof !0, !llvm.loop !1
+;
+; MULT4 is like applying -unroll-count=2 to MULT2 without converting any
+; more conditional latches to unconditional, so MULT2's branch weights work.
+; MULT4: !0 = !{!"branch_weights", i32 9, i32 1}
+; MULT4: !1 = distinct !{!1, !2, !3}
+; MULT4: !2 = !{!"llvm.loop.estimated_trip_count", i32 3}
+; MULT4: !3 = !{!"llvm.loop.unroll.disable"}
+
+; ------------------------------------------------------------------------------
+; Check case when the original loop's number of iterations is a run-time
+; determined multiple of 10, the unroll count is even so that odd-numbered
+; unrolled iterations become unconditional, and the original loop body frequency
+; is 1, which is impossibly low. This case is important to ensure the
+; implementation does not malfunction by trying to use negative and possibly
+; infinite probabilities to reach the original loop body frequency.
+;
+; RUN: sed -e s/@N@/%mul10/ -e s/@W@/0/ %s > %t.ll
+;
+; Check the original loop body frequency.
+;
+; RUN: %{bf-fc} LOW-ORIG
+; LOW-ORIG: - do.body: float = 1.0,
+;
+; -unroll-count=2, so there is 1 remaining conditional latch. The
+; implementation tries to compute uniform weights by solving a linear equation
+; but ultimately sets the latch's probability to zero.
+;
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} LOW2
+;
+; Multiply by 2, but the result is greater than the original loop body
+; frequency, 1, which is impossibly low.
+; LOW2: - do.body: float = 1.0,
+;
+; LOW2: call void @f
+; LOW2-NOT: br
+; LOW2: call void @f
+; LOW2: br i1 %{{.*}}, label %do.body, label %do.end, !prof !0, !llvm.loop !1{{$}}
+;
+; LOW2: !0 = !{!"branch_weights", i32 0, i32 1}
+; LOW2: !1 = distinct !{!1, !2, !3}
+; LOW2: !2 = !{!"llvm.loop.estimated_trip_count", i32 1}
+; LOW2: !3 = !{!"llvm.loop.unroll.disable"}
+;
+; -unroll-count=4, so there are 2 remaining conditional latches. The
+; implementation tries to compute uniform weights using the quadratic formula
+; but ultimately sets both latches' probabilities to zero.
+;
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} LOW4
+;
+; Multiply by 2 and sum, but the result is greater than the original loop body
+; frequency, 1, which is impossibly low.
+; LOW4: - do.body: float = 1.0,
+; LOW4: - do.body.2: float = 0.0{{(0000[0-9]*)?}},
+;
+; LOW4: call void @f
+; LOW4-NOT: br
+; LOW4: call void @f
+; LOW4: br i1 %{{.*}}, label %do.body.2, label %do.end, !prof !0
+; LOW4: call void @f
+; LOW4-NOT: br
+; LOW4: call void @f
+; LOW4: br i1 %{{.*}}, label %do.body, label %do.end, !prof !0, !llvm.loop !1
+;
+; LOW4: !0 = !{!"branch_weights", i32 0, i32 1}
+; LOW4: !1 = distinct !{!1, !2, !3}
+; LOW4: !2 = !{!"llvm.loop.estimated_trip_count", i32 1}
+; LOW4: !3 = !{!"llvm.loop.unroll.disable"}
+
+; ------------------------------------------------------------------------------
+; Check cases when the original loop's number of iterations is a constant 10 and
+; the original loop body frequency is 10.
+;
+; RUN: sed -e s/@N@/10/g -e s/@W@/9/ %s > %t.ll
+;
+; Because we test only partial unrolling, there is always exactly one unrolled
+; iteration that can possibly exit, so only its latch can remain conditional.
+; Because there is only one, its branch weights can be computed with a simple
+; formula.
+;
+; Check the original loop body frequency.
+;
+; RUN: %{bf-fc} CONST-ORIG
+; CONST-ORIG: - do.body: float = 10.0,
+;
+; Check when the unrolled loop's backedge remains conditional.
+;
+; RUN: %{ur-bf} -unroll-count=2 | %{fc} CONST2
+;
+; Multiply by 2 to get the original loop body frequency, 10.
+; FIXME: Should be 5.0:
+; CONST2: - do.body: float = 10.0,
+;
+; CONST2: call void @f
+; CONST2-NOT: br:
+; CONST2: call void @f
+; CONST2: br i1 %{{.*}}, label %do.body, label %do.end, !prof !0, !llvm.loop !1
+;
+; Like MULT2.
+; CONST2: !0 = !{!"branch_weights", i32 9, i32 1}
+; CONST2: !1 = distinct !{!1, !2, !3}
+; CONST2: !2 = !{!"llvm.loop.estimated_trip_count", i32 5}
+; CONST2: !3 = !{!"llvm.loop.unroll.disable"}
+;
+; Check when the unrolled loop's backedge unconditionally continues.
+;
+; RUN: %{ur-bf} -unroll-count=4 | %{fc} CONST4
+;
+; Multiply by 2 and sum to get the original loop body frequency, 10.
+; FIXME: Should sum to 5.0:
+; CONST4: - do.body: float = 10.0,
+; CONST4: - do.body.2: float = 9.0,
+;
+; CONST4: call void @f
+; CONST4-NOT: br
+; CONST4: call void @f
+; CONST4: br i1 %{{.*}}, label %do.body.2, label %do.end, !prof !0
+; CONST4: call void @f
+; CONST4-NOT: br
+; CONST4: call void @f
+; CONST4: br label %do.body, !llvm.loop !1
+;
+; There is no llvm.loop.estimated_trip_count because the unrolled loop's latch
+; in do.body.2 unconditionally continues. The branch weights on do.body's
+; branch imply do.body continues twice and then exits once, thus executing the
+; original loop body 10 times.
+; CONST4: !0 = !{!"branch_weights", i32 9, i32 1}
+; CONST4: !1 = distinct !{!1, !2}
+; CONST4: !2 = !{!"llvm.loop.unroll.disable"}
+
+declare void @f(i32)
+
+define void @test(i32 %n) {
+entry:
+ %mul10 = mul i32 %n, 10
+ br label %do.body
+
+do.body:
+ %i = phi i32 [ 0, %entry ], [ %next, %do.body ]
+ call void @f(i32 %i)
+ %next = add i32 %i, 1
+ %c = icmp ne i32 %next, @N@
+ br i1 %c, label %do.body, label %do.end, !prof !0
+
+do.end:
+ ret void
+}
+
+; Loop body frequency is @W@ + 1.
+!0 = !{!"branch_weights", i32 @W@, i32 1}
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
index af5342c5e35cd..ea6f4a4180fc9 100644
--- a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
@@ -1,5 +1,6 @@
; Test branch weight metadata, estimated trip count metadata, and block
-; frequencies after partial loop unrolling without -unroll-runtime.
+; frequencies after partial loop unrolling without -unroll-runtime and without
+; converting any iteration's latch to an unconditional branch.
; ------------------------------------------------------------------------------
; RUN: opt < %s -S -passes='print<block-freq>' 2>&1 | \
diff --git a/llvm/test/Transforms/LoopUnroll/loop-probability-one.ll b/llvm/test/Transforms/LoopUnroll/loop-probability-one.ll
index 14f6da42df6b1..89915d29f5921 100644
--- a/llvm/test/Transforms/LoopUnroll/loop-probability-one.ll
+++ b/llvm/test/Transforms/LoopUnroll/loop-probability-one.ll
@@ -1,73 +1,97 @@
; Check that a loop probability of one (indicating an always infinite loop) does
; not crash or otherwise break LoopUnroll behavior when it tries to compute new
; probabilities from it.
-;
-; That case indicates an always infinite loop. A remainder loop cannot be
-; calculated at run time when the original loop is infinite as infinity %
-; UnrollCount is undefined, so consistent remainder loop probabilities are
-; difficult or impossible to reason about. The implementation chooses
-; probabilities indicating that all remainder loop iterations will always
-; execute.
-
-; DEFINE: %{unroll} = opt < %s -unroll-count=3 -passes=loop-unroll -S
-; DEFINE: %{rt} = %{unroll} -unroll-runtime
-
-; RUN: %{unroll} | FileCheck %s -check-prefix UNROLL
-; RUN: %{rt} -unroll-runtime-epilog=true | FileCheck %s -check-prefix EPILOG
-; RUN: %{rt} -unroll-runtime-epilog=false | FileCheck %s -check-prefix PROLOG
-
-define void @test(i32 %n) {
-entry:
- br label %loop
-loop:
- %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
- %inc = add i32 %i, 1
- %c = icmp slt i32 %inc, %n
- br i1 %c, label %loop, label %end, !prof !0
+; DEFINE: %{unroll} = opt < %t.ll -unroll-count=3 -passes=loop-unroll -S
+; DEFINE: %{fc} = FileCheck %s \
+; DEFINE: -implicit-check-not='llvm.loop.estimated_trip_count' \
+; DEFINE: -implicit-check-not='!prof' \
+; DEFINE: -implicit-check-not='branch_weights' \
+; DEFINE: -implicit-check-not='call void @f' -check-prefixes
-end:
- ret void
-}
+; ------------------------------------------------------------------------------
+; A partially unrolled loop remains infinite.
+;
+; RUN: sed -e s/@N@/%n/ %s > %t.ll
+; RUN: %{unroll} | %{fc} PART-ALL-COND
+;
+; PART-ALL-COND: call void @f
+; PART-ALL-COND: br i1 %{{.*}}, label %loop.1, label %end, !prof !0
+; PART-ALL-COND: call void @f
+; PART-ALL-COND: br i1 %{{.*}}, label %loop.2, label %end, !prof !0
+; PART-ALL-COND: call void @f
+; PART-ALL-COND: br i1 %{{.*}}, label %loop, label %end, !prof !0, !llvm.loop !1
+; PART-ALL-COND: !0 = !{!"branch_weights", i32 1, i32 0}
+; ------------------------------------------------------------------------------
+; A partially unrolled loop remains infinite even if some iterations' latches
+; become unconditional.
+;
+; RUN: sed -e s/@N@/5/ %s > %t.ll
+; RUN: %{unroll} | %{fc} PART-SOME-COND
+;
+; PART-SOME-COND: call void @f
+; PART-SOME-COND-NOT: br
+; PART-SOME-COND: call void @f
+; PART-SOME-COND: br i1 %{{.*}}, label %loop.2, label %end, !prof !0
+; PART-SOME-COND: call void @f
+; PART-SOME-COND: br label %loop, !llvm.loop !1
+; PART-SOME-COND: !0 = !{!"branch_weights", i32 1, i32 0}
-!0 = !{!"branch_weights", i32 1, i32 0}
+; ------------------------------------------------------------------------------
+; A completely unrolled loop cannot be infinite, so consistent unrolled loop
+; probabilities are impossible. The implementation chooses probabilities
+; indicating that all unrolled loop iterations will always execute.
+;
+; RUN: sed -e s/@N@/%max3/ %s > %t.ll
+; RUN: %{unroll} | %{fc} COMPLETE-SOME-COND
+;
+; COMPLETE-SOME-COND: call void @f
+; COMPLETE-SOME-COND: br i1 %{{.*}}, label %loop.1, label %end, !prof !0
+; COMPLETE-SOME-COND: call void @f
+; COMPLETE-SOME-COND: br i1 %{{.*}}, label %loop.2, label %end, !prof !0
+; COMPLETE-SOME-COND: call void @f
+; COMPLETE-SOME-COND: br label %end
+; COMPLETE-SOME-COND: !0 = !{!"branch_weights", i32 1, i32 0}
-; UNROLL: define void @test(i32 %n) {
-; UNROLL: entry:
-; UNROLL: br label %loop
-; UNROLL: loop:
-; UNROLL: br i1 %c, label %loop.1, label %end, !prof !0
-; UNROLL: loop.1:
-; UNROLL: br i1 %c.1, label %loop.2, label %end, !prof !0
-; UNROLL: loop.2:
-; UNROLL: br i1 %c.2, label %loop, label %end, !prof !0, !llvm.loop !1
-; UNROLL-NOT: loop.3
-; UNROLL: end:
-; UNROLL: ret void
-; UNROLL: }
-;
-; Infinite unrolled loop.
-; UNROLL: !0 = !{!"branch_weights", i32 1, i32 0}
+; ------------------------------------------------------------------------------
+; A completely unrolled loop with no remaining conditional latches gives the
+; implementation no probabilities to set. Check that it still behaves.
+;
+; RUN: sed -e s/@N@/3/ %s > %t.ll
+; RUN: %{unroll} | %{fc} COMPLETE-NO-COND
+;
+; COMPLETE-NO-COND: call void @f
+; COMPLETE-NO-COND-NOT: br
+; COMPLETE-NO-COND: call void @f
+; COMPLETE-NO-COND-NOT: br
+; COMPLETE-NO-COND: call void @f
-; EPILOG: define void @test(i32 %n) {
-; EPILOG: entry:
-; EPILOG: br i1 %{{.*}}, label %loop.epil.preheader, label %entry.new, !prof !0
-; EPILOG: entry.new:
-; EPILOG: br label %loop
-; EPILOG: loop:
-; EPILOG: br i1 %{{.*}}, label %loop, label %end.unr-lcssa, !prof !1
-; EPILOG: end.unr-lcssa:
-; EPILOG: br i1 %{{.*}}, label %loop.epil.preheader, label %end, !prof !1
-; EPILOG: loop.epil.preheader:
-; EPILOG: br label %loop.epil
-; EPILOG: loop.epil:
-; EPILOG: br i1 %{{.*}}, label %loop.epil, label %end.epilog-lcssa, !prof !4
-; EPILOG: end.epilog-lcssa:
-; EPILOG: br label %end
-; EPILOG: end:
-; EPILOG: ret void
-; EPILOG: }
+; ------------------------------------------------------------------------------
+; A remainder loop cannot be calculated at run time when the original loop is
+; infinite as infinity % UnrollCount is undefined, so consistent remainder loop
+; probabilities are difficult or impossible to reason about. The implementation
+; chooses probabilities indicating that all remainder loop iterations will
+; always execute.
+;
+; RUN: sed -e s/@N@/%n/ %s > %t.ll
+; DEFINE: %{rt} = %{unroll} -unroll-runtime
+; RUN: %{rt} -unroll-runtime-epilog=true | %{fc} EPILOG
+; RUN: %{rt} -unroll-runtime-epilog=false | %{fc} PROLOG
+;
+; Unrolled loop guard, body, and latch.
+; EPILOG: br i1 %{{.*}}, label %loop.epil.preheader, label %entry.new, !prof !0
+; EPILOG: call void @f
+; EPILOG-NOT: br
+; EPILOG: call void @f
+; EPILOG-NOT: br
+; EPILOG: call void @f
+; EPILOG: br i1 %{{.*}}, label %loop, label %end.unr-lcssa, !prof !1
+;
+; Epilogue guard, body, and latch.
+; EPILOG: br i1 %{{.*}}, label %loop.epil.preheader, label %end, !prof !1
+; EPILOG: call void @f
+; EPILOG: br i1 %{{.*}}, label %loop.epil, label %end.epilog-lcssa, !prof !4
;
; Unrolled loop guard: Unrolled loop is always entered.
; EPILOG: !0 = !{!"branch_weights", i32 0, i32 -2147483648}
@@ -78,27 +102,20 @@ end:
;
; Epilogue loop latch: Epilogue loop executes both of its 2 iterations.
; EPILOG: !4 = !{!"branch_weights", i32 1073741824, i32 1073741824}
-
-; PROLOG: define void @test(i32 %n) {
-; PROLOG: entry:
-; PROLOG: br i1 %{{.*}}, label %loop.prol.preheader, label %loop.prol.loopexit, !prof !0
-; PROLOG: loop.prol.preheader:
-; PROLOG: br label %loop.prol
-; PROLOG: loop.prol:
-; PROLOG: br i1 %{{.*}}, label %loop.prol, label %loop.prol.loopexit.unr-lcssa, !prof !1
-; PROLOG: loop.prol.loopexit.unr-lcssa:
-; PROLOG: br label %loop.prol.loopexit
-; PROLOG: loop.prol.loopexit:
-; PROLOG: br i1 %{{.*}}, label %end, label %entry.new, !prof !0
-; PROLOG: entry.new:
-; PROLOG: br label %loop
-; PROLOG: loop:
-; PROLOG: br i1 %{{.*}}, label %loop, label %end.unr-lcssa, !prof !4
-; PROLOG: end.unr-lcssa:
-; PROLOG: br label %end
-; PROLOG: end:
-; PROLOG: ret void
-; PROLOG: }
+;
+; Prologue guard, body, and latch.
+; PROLOG: br i1 %{{.*}}, label %loop.prol.preheader, label %loop.prol.loopexit, !prof !0
+; PROLOG: call void @f
+; PROLOG: br i1 %{{.*}}, label %loop.prol, label %loop.prol.loopexit.unr-lcssa, !prof !1
+;
+; Unrolled loop guard, body, and latch.
+; PROLOG: br i1 %{{.*}}, label %end, label %entry.new, !prof !0
+; PROLOG: call void @f
+; PROLOG-NOT: br
+; PROLOG: call void @f
+; PROLOG-NOT: br
+; PROLOG: call void @f
+; PROLOG: br i1 %{{.*}}, label %loop, label %end.unr-lcssa, !prof !4
;
; FIXME: Branch weights still need to be fixed in the case of prologues (issue
; #135812), so !0 and !1 do not yet match their comments below. When we do
@@ -114,3 +131,23 @@ end:
;
; Unrolled loop latch: Unrolled loop is infinite.
; PROLOG: !4 = !{!"branch_weights", i32 1, i32 0}
+
+declare void @f(i32)
+
+define void @test(i32 %n) {
+entry:
+ %max3 = call i32 @llvm.umin.i32(i32 %n, i32 3)
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ call void @f(i32 %i)
+ %inc = add i32 %i, 1
+ %c = icmp slt i32 %inc, @N@
+ br i1 %c, label %loop, label %end, !prof !0
+
+end:
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 0}
More information about the llvm-branch-commits
mailing list