[llvm-branch-commits] [llvm] [LoopUnroll] Fix freqs for unconditional latches: introduce tests (PR #191008)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Apr 8 09:30:50 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Joel E. Denny (jdenny-ornl)

<details>
<summary>Changes</summary>

This patch introduces all tests for PR #<!-- -->179520 but with current results so that it is easier to see which results PR #<!-- -->179520 improves. This patch should not land without PR #<!-- -->179520.

---

Patch is 60.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/191008.diff


5 Files Affected:

- (added) llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll (+530) 
- (modified) llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll (+214-56) 
- (added) llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial-unconditional-latch.ll (+280) 
- (modified) llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll (+2-1) 
- (modified) llvm/test/Transforms/LoopUnroll/loop-probability-one.ll (+119-82) 


``````````diff
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll
new file mode 100644
index 0000000000000..fd7df00515e25
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-complete.ll
@@ -0,0 +1,530 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after complete loop unrolling.  The final unrolled iteration
+; unconditionally exits (backedge removed), and other unrolled iterations'
+; latches might unconditionally continue.  Either contradicts the original
+; branch weights.
+;
+; (unroll-partial-unconditional-latch.ll tests partial unrolling cases,
+; including cases where the latch of any iteration, including the final, might
+; unconditionally continue.)
+;
+; For each case, we check:
+; - Iteration frequencies
+;   - When each is multiplied by the number of original loop bodies that execute
+;     within it, they should sum to almost exactly the original loop body
+;     frequency.
+;   - The only exception is an impossibly high or low original frequency (e.g.,
+;     due to bad profile data), for which there exist no new branch weights that
+;     can yield that frequency sum.  In those cases, we expect the maximum or
+;     minimum possible frequency.
+; - CFGs
+;   - We verify which branch weights go with which branches and that we did not
+;     overlook any other branch weights (no extra !prof or branch_weights).
+;   - We also check the number of original loop bodies (represented by a call to
+;     @f) that appear within each unrolled iteration.
+; - Branch weight metadata
+;   - Checking frequencies already checks whether the branch weights have the
+;     expected effect, but we also want to check that we get uniform
+;     probabilities/weights (same !prof) across the unrolled iteration latches
+;     when expected.
+; - llvm.loop.estimated_trip_count:
+;   - There should be none because loops are completely unrolled.
+
+; ------------------------------------------------------------------------------
+; Define LIT substitutions.
+;
+; Before using the following lit substitutions, sed should be called to replace
+; these parameters in %s to produce %t.ll:
+; - @I_0@ is the starting value for the original loop's induction variable.
+; - @MIN@ and @MAX@ are the compile-time known minimum and maximum for the
+;   number of original loop iterations, regardless of @I_0 at .
+; - @W@ is the branch weight for the original loop's backedge.  That value plus
+;   1 is the original loop body frequency because the exit branch weight is 1.
+;
+; For verifying that the test code produces the original loop body frequency we
+; expect.
+; DEFINE: %{bf-fc} = opt %t.ll -S -passes='print<block-freq>' 2>&1 | \
+; DEFINE:   FileCheck %s -check-prefixes
+;
+; For checking the unrolled loop.
+; DEFINE: %{ur-bf} = opt %t.ll -S -passes='loop-unroll,print<block-freq>' 2>&1
+; DEFINE: %{fc} = FileCheck %s \
+; DEFINE:     -implicit-check-not='llvm.loop.estimated_trip_count' \
+; DEFINE:     -implicit-check-not='!prof' \
+; DEFINE:     -implicit-check-not='branch_weights' \
+; DEFINE:     -implicit-check-not='call void @f' -check-prefixes
+
+; ------------------------------------------------------------------------------
+; Check 1 max iteration:
+; - Unroll count of >=1 should always produce complete unrolling.
+; - That produces 0 unrolled iteration latches, so there are no branch weights
+;   to compute.
+;
+; Original loop body frequency is 2 (loop weight 1), which is impossibly high.
+;
+;   RUN: sed -e s/@MAX@/1/ -e s/@W@/1/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;   RUN: %{bf-fc} ORIG1210
+;   RUN: %{ur-bf} -unroll-count=1 | %{fc} UR1210
+;   RUN: %{ur-bf} -unroll-count=2 | %{fc} UR1210
+;
+;   The new do.body is less than the old do.body, which is impossibly high.
+;   ORIG1210: - do.body: float = 2.0,
+;   UR1210:   - do.body: float = 1.0,
+;
+;   UR1210: call void @f
+;
+; Original loop body frequency is 1 (loop weight 0).
+;
+;   RUN: sed -e s/@MAX@/1/ -e s/@W@/0/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;   RUN: %{bf-fc} ORIG1110
+;   RUN: %{ur-bf} -unroll-count=1 | %{fc} UR1110
+;   RUN: %{ur-bf} -unroll-count=2 | %{fc} UR1110
+;
+;   The the new do.body equals the old do.body.
+;   ORIG1110: - do.body: float = 1.0,
+;   UR1110:   - do.body: float = 1.0,
+;
+;   UR1110: call void @f
+
+; ------------------------------------------------------------------------------
+; Check 2 max iterations:
+; - Unroll count of >=2 should always produce complete unrolling.
+; - That produces <=1 unrolled iteration latch, so the implementation can
+;   compute uniform weights by solving, at worst, a linear equation.
+;
+; Original loop body frequency is 3 (loop weight 2), which is impossibly high.
+;
+;   First use a variable iteration count so that the sole non-final unrolled
+;   iteration's latch remains conditional.
+;
+;     RUN: sed -e s/@MAX@/2/ -e s/@W@/2/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG2310
+;     RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2310
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2310
+;
+;     The sum of the new do.body* cannot reach the old do.body, which is
+;     impossibly high.
+;     ORIG2310: - do.body: float = 3.0,
+;     UR2310:   - do.body: float = 1.0,
+;     FIXME: Should be 1.0:
+;     UR2310:   - do.body.1: float = 0.66667
+;
+;     The sole probability is maximized to try to reach the original frequency.
+;     UR2310: call void @f
+;     UR2310: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR2310: call void @f
+;     UR2310: br label %do.end
+;     FIXME: Should be (0, non-zero):
+;     UR2310: !0 = !{!"branch_weights", i32 1, i32 2}
+;
+;   Now use a constant iteration count so that the sole non-final unrolled
+;   iteration's latch unconditionally continues.
+;
+;     RUN: sed -e s/@MAX@/2/ -e s/@W@/2/ -e s/@MIN@/2/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG2320
+;     RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2320
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2320
+;
+;     The new do.body contains 2 of the original loop's iterations, so multiply
+;     it by 2, which is less than the old do.body, which is impossibly high.
+;     ORIG2320: - do.body: float = 3.0,
+;     UR2320:   - do.body: float = 1.0,
+;
+;     UR2320:     call void @f
+;     UR2320-NOT: br
+;     UR2320:     call void @f
+;     UR2320:     ret void
+;
+; Original loop body frequency is 2 (loop weight 1).
+;
+;   First use a variable iteration count so that the sole non-final unrolled
+;   iteration's latch remains conditional.
+;
+;     RUN: sed -e s/@MAX@/2/ -e s/@W@/1/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG2210
+;     RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2210
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2210
+;
+;     The sum of the new do.body* is the old do.body.
+;     ORIG2210: - do.body: float = 2.0,
+;     UR2210:   - do.body: float = 1.0,
+;     FIXME: Should be 1.0:
+;     UR2210:   - do.body.1: float = 0.5,
+;
+;     UR2210: call void @f
+;     UR2210: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR2210: call void @f
+;     UR2210: br label %do.end
+;     FIXME: Should be (0, non-zero):
+;     UR2210: !0 = !{!"branch_weights", i32 1, i32 1}
+;
+;   Now use a constant iteration count so that the sole non-final unrolled
+;   iteration's latch unconditionally continues.
+;
+;     RUN: sed -e s/@MAX@/2/ -e s/@W@/1/ -e s/@MIN@/2/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG2220
+;     RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2220
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2220
+;
+;     The new do.body contains 2 of the original loop's iterations, so multiply
+;     it by 2 to get the old do.body.
+;     ORIG2220: - do.body: float = 2.0,
+;     UR2220:   - do.body: float = 1.0,
+;
+;     UR2220:     call void @f
+;     UR2220-NOT: br
+;     UR2220:     call void @f
+;     UR2220:     ret void
+;
+; Original loop body frequency is 1 (loop weight 0).
+;
+;   First use a variable iteration count so that the sole non-final unrolled
+;   iteration's latch remains conditional.
+;
+;     RUN: sed -e s/@MAX@/2/ -e s/@W@/0/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG2110
+;     RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2110
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2110
+;
+;     The sum of the new do.body* is approximately the old do.body.
+;     ORIG2110: - do.body: float = 1.0,
+;     UR2110:   - do.body: float = 1.0,
+;     UR2110:   - do.body.1: float = 0.0{{(0000[0-9]*)?}},
+;
+;     UR2110: call void @f
+;     UR2110: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR2110: call void @f
+;     UR2110: br label %do.end
+;     UR2110: !0 = !{!"branch_weights", i32 1, i32 0}
+;
+;   Now use a constant iteration count so that the sole non-final unrolled
+;   iteration's latch unconditionally continues.
+;
+;     RUN: sed -e s/@MAX@/2/ -e s/@W@/0/ -e s/@MIN@/2/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG2120
+;     RUN: %{ur-bf} -unroll-count=2 | %{fc} UR2120
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR2120
+;
+;     The new do.body contains 2 of the original loop's iterations, so multiply
+;     it by 2, which is greater than the old do.body, which is impossibly low.
+;     ORIG2120: - do.body: float = 1.0,
+;     UR2120:   - do.body: float = 1.0,
+;
+;     UR2120:     call void @f
+;     UR2220-NOT: br
+;     UR2120:     call void @f
+;     UR2120:     ret void
+
+; ------------------------------------------------------------------------------
+; Check 3 max iterations:
+; - Unroll count of >=3 should always produce complete unrolling.
+; - That produces <=2 unrolled iteration latches, so the implementation can
+;   compute uniform weights solving, at worst, a quadratic equation.
+;
+; Original loop body frequency is 4 (loop weight 3), which is impossibly high.
+;
+;   First use a variable iteration count so that all non-final unrolled
+;   iterations' latches remain conditional.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/3/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3410
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3410
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3410
+;
+;     The sum of the new do.body* cannot reach the old do.body, which is
+;     impossibly high.
+;     ORIG3410: - do.body: float = 4.0,
+;     UR3410:   - do.body: float = 1.0,
+;     FIXME: Should be 1.0:
+;     UR3410:   - do.body.1: float = 0.75,
+;     FIXME: Should be 1.0:
+;     UR3410:   - do.body.2: float = 0.5625,
+;
+;     The probabilities are maximized to try to reach the original frequency.
+;     UR3410: call void @f
+;     UR3410: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR3410: call void @f
+;     UR3410: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+;     UR3410: call void @f
+;     UR3410: br label %do.end
+;     FIXME: Should be (0, non-zero):
+;     UR3410: !0 = !{!"branch_weights", i32 1, i32 3}
+;
+;   Now use a constant iteration count so that all non-final unrolled
+;   iterations' latches unconditionally continue.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/3/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3430
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3430
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3430
+;
+;     The new do.body contains 3 of the original loop's iterations, so multiply
+;     it by 3, which is less than the old do.body, which is impossibly high.
+;     ORIG3430: - do.body: float = 4.0,
+;     UR3430:   - do.body: float = 1.0,
+;
+;     UR3430:     call void @f
+;     UR3430-NOT: br
+;     UR3430:     call void @f
+;     UR3430-NOT: br
+;     UR3430:     call void @f
+;     UR3430:     ret void
+;
+;   Use a constant iteration count but now the loop upper bound computation can
+;   overflow.  When it does, the loop induction variable is greater than it
+;   immediately, so the initial unrolled iteration's latch remains conditional.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/3/ -e s/@MIN@/3/ -e s/@I_0@/%x/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG343x
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR343x
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR343x
+;
+;     The new do.body.1 contains 2 of the original loop's iterations, so
+;     multiply it by 2, and add the new do.body, but that sum is less than the
+;     old do.body, which is impossibly high.
+;     ORIG343x: - do.body: float = 4.0,
+;     UR343x:   - do.body: float = 1.0,
+;     FIXME: Should be 1.0:
+;     UR343x:   - do.body.1: float = 0.75,
+;
+;     The sole probability is maximized to try to reach the original frequency.
+;     UR343x:     call void @f
+;     UR343x:     br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR343x:     call void @f
+;     UR343x-NOT: br
+;     UR343x:     call void @f
+;     UR343x:     ret void
+;     FIXME: Should be (0, non-zero):
+;     UR343x:     !0 = !{!"branch_weights", i32 1, i32 3}
+;
+; Original loop body frequency is 3 (loop weight 2).
+;
+;   First use a variable iteration count so that all non-final unrolled
+;   iterations' latches remain conditional.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/2/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3310
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3310
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3310
+;
+;     The sum of the new do.body* is the old do.body.
+;     ORIG3310: - do.body: float = 3.0,
+;     UR3310:   - do.body: float = 1.0,
+;     FIXME: Should be 1.0:
+;     UR3310:   - do.body.1: float = 0.66667,
+;     FIXME: Should be 1.0:
+;     UR3310:   - do.body.2: float = 0.44444,
+;
+;     UR3310: call void @f
+;     UR3310: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR3310: call void @f
+;     UR3310: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+;     UR3310: call void @f
+;     UR3310: br label %do.end
+;     FIXME: Should be (very small, very large):
+;     UR3310: !0 = !{!"branch_weights", i32 1, i32 2}
+;
+;   Now use a constant iteration count so that all non-final unrolled
+;   iterations' latches unconditionally continue.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/2/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3330
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3330
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3330
+;
+;     The new do.body contains 3 of the original loop's iterations, so multiply
+;     it by 3 to get the old do.body.
+;     ORIG3330: - do.body: float = 3.0,
+;     UR3330:   - do.body: float = 1.0,
+;
+;     UR3330:     call void @f
+;     UR3330-NOT: br
+;     UR3330:     call void @f
+;     UR3330-NOT: br
+;     UR3330:     call void @f
+;     UR3330:     ret void
+;
+;   Use a constant iteration count but now the loop upper bound computation can
+;   overflow.  When it does, the loop induction variable is greater than it
+;   immediately, so the initial unrolled iteration's latch remains conditional.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/2/ -e s/@MIN@/3/ -e s/@I_0@/%x/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG333x
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR333x
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR333x
+;
+;     The new do.body.1 contains 2 of the original loop's iterations, so
+;     multiply it by 2, and add the new do.body to get the old do.body.
+;     ORIG333x: - do.body: float = 3.0,
+;     UR333x:   - do.body: float = 1.0,
+;     FIXME: Should be 1.0:
+;     UR333x:   - do.body.1: float = 0.66667,
+;
+;     UR333x:     call void @f
+;     UR333x: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR333x:     call void @f
+;     UR333x-NOT: br
+;     UR333x:     call void @f
+;     UR333x:     br label %do.end
+;     FIXME: Should be (very small, very large):
+;     UR333x:     !0 = !{!"branch_weights", i32 1, i32 2}
+;
+; Original loop body frequency is 2 (loop weight 1).  This is our first case
+; where new frequencies and probabilities are not all approximately 1 or 0.
+;
+;   First use a variable iteration count so that all non-final unrolled
+;   iterations' latches remain conditional.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/1/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3210
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3210
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3210
+;
+;     The sum of the new do.body* is the old do.body.
+;     ORIG3210: - do.body: float = 2.0,
+;     UR3210:   - do.body: float = 1.0,
+;     FIXME: Should sum to 1.0:
+;     UR3210:   - do.body.1: float = 0.5,
+;     UR3210:   - do.body.2: float = 0.25,
+;
+;     UR3210: call void @f
+;     UR3210: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR3210: call void @f
+;     UR3210: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+;     UR3210: call void @f
+;     UR3210: br label %do.end
+;     UR3210: !0 = !{!"branch_weights", i32 1, i32 1}
+;
+;   Now use a constant iteration count so that all non-final unrolled
+;   iterations' latches unconditionally continue.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/1/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3230
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3230
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3230
+;
+;     The new do.body contains 3 of the original loop's iterations, so multiply
+;     it by 3, which is greater than the old do.body, which is impossibly low.
+;     ORIG3230: - do.body: float = 2.0,
+;     UR3230:   - do.body: float = 1.0,
+;
+;     UR3230:     call void @f
+;     UR3230-NOT: br
+;     UR3230:     call void @f
+;     UR3230-NOT: br
+;     UR3230:     call void @f
+;     UR3230:     ret void
+;
+;   Use a constant iteration count but now the loop upper bound computation can
+;   overflow.  When it does, the loop induction variable is greater than it
+;   immediately, so the initial unrolled iteration's latch remains conditional.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/1/ -e s/@MIN@/3/ -e s/@I_0@/%x/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG323x
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR323x
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR323x
+;
+;     The new do.body.1 contains 2 of the original loop's iterations, so
+;     multiply it by 2, and add the new do.body to get the old do.body.
+;     ORIG323x: - do.body: float = 2.0,
+;     UR323x:   - do.body: float = 1.0,
+;     UR323x:   - do.body.1: float = 0.5,
+;
+;     UR323x:     call void @f
+;     UR323x:     br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR323x:     call void @f
+;     UR323x-NOT: br
+;     UR323x:     call void @f
+;     UR323x:     br label %do.end
+;     UR323x:     !0 = !{!"branch_weights", i32 1, i32 1}
+;
+; Original loop body frequency is 1 (loop weight 0).
+;
+;   First use a variable iteration count so that all non-final unrolled
+;   iterations' latches remain conditional.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/0/ -e s/@MIN@/1/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3110
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3110
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3110
+;
+;     The sum of the new do.body* is approximately the old do.body.
+;     ORIG3110: - do.body: float = 1.0,
+;     UR3110:   - do.body: float = 1.0,
+;     UR3110:   - do.body.1: float = 0.0{{(0000[0-9]*)?}},
+;     UR3110:   - do.body.2: float = 0.0{{(0000[0-9]*)?}},
+;
+;     UR3110: call void @f
+;     UR3110: br i1 %{{.*}}, label %do.end, label %do.body.1, !prof !0
+;     UR3110: call void @f
+;     UR3110: br i1 %{{.*}}, label %do.end, label %do.body.2, !prof !0
+;     UR3110: call void @f
+;     UR3110: br label %do.end
+;     UR3110: !0 = !{!"branch_weights", i32 1, i32 0}
+;
+;   Now use a constant iteration count so that all non-final unrolled
+;   iterations' latches unconditionally continue.
+;
+;     RUN: sed -e s/@MAX@/3/ -e s/@W@/0/ -e s/@MIN@/3/ -e s/@I_0@/0/ %s > %t.ll
+;     RUN: %{bf-fc} ORIG3130
+;     RUN: %{ur-bf} -unroll-count=3 | %{fc} UR3130
+;     RUN: %{ur-bf} -unroll-count=4 | %{fc} UR3130
+;
+;     The new do.body contains 3 of the original loop's iterations, so multiply
+;     it by 3, which is greater than the old do.body, which is impossibly low.
+;     ORIG3130: - do.body: float = 1.0,
+;     UR3130:   - do.body: float = 1.0,
+;
+;     UR3130:     call void @f
+;     UR3130-NOT: br
+;     UR3130:     call void @f
+;     UR3130-NOT: br
...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/191008


More information about the llvm-branch-commits mailing list