[llvm] [LoopUnroll] Allow expensive trip count emitting which is estimated to be infinite (PR #79869)

Aleksandr Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 6 03:05:28 PST 2024


https://github.com/aleks-tmb updated https://github.com/llvm/llvm-project/pull/79869

>From f15fc1dfb7dfa845ac9a21fff067172678f0e14b Mon Sep 17 00:00:00 2001
From: Aleksandr Popov <apopov at azul.com>
Date: Mon, 29 Jan 2024 02:49:05 -0800
Subject: [PATCH] [LoopUnroll] Allow expensive trip count emitting which is
 estimated to be infinite

We now set the AllowExpensiveTripCount flag if we could get an estimated
trip count and that trip count is small.

But if we have an exit branch weights ratio of 1:0, which means a very
large trip count, we skip setting the flag because we say that the trip
count is not esimated.

This patch adds case handling to allow unroll for such loops.
---
 .../include/llvm/Transforms/Utils/LoopUtils.h |  4 +
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |  3 +-
 llvm/lib/Transforms/Utils/LoopUtils.cpp       | 15 ++++
 .../infinite-expensive-trip-count.ll          | 81 +++++++++++++++++++
 4 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll

diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 5a1385d01d8e44..2c40309e2223c5 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -330,6 +330,10 @@ getLoopEstimatedTripCount(Loop *L,
 bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
                                unsigned EstimatedLoopInvocationWeight);
 
+// Return true if loop's estimated trip count based on branch weight metadata is
+// infinite, i.e. weight ratio is X:0.
+bool isInfiniteTripCount(Loop *L);
+
 /// Check inner loop (L) backedge count is known to be invariant on all
 /// iterations of its outer loop. If the loop has no parent, this is trivially
 /// true.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7dfe4aca6fe400..f4271d6fb535bb 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1054,7 +1054,8 @@ bool llvm::computeUnrollCount(
         return false;
       else
         UP.AllowExpensiveTripCount = true;
-    }
+    } else if (isInfiniteTripCount(L))
+      UP.AllowExpensiveTripCount = true;
   }
   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
   if (!UP.Runtime) {
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 59485126b280ab..3e1015de8f3360 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -895,6 +895,21 @@ bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
   return true;
 }
 
+bool llvm::isInfiniteTripCount(Loop *L) {
+  if (BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L)) {
+    uint64_t LoopWeight, ExitWeight;
+    if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
+      return false;
+
+    if (L->contains(ExitingBranch->getSuccessor(1)))
+      std::swap(LoopWeight, ExitWeight);
+
+    if (!ExitWeight)
+      return true;
+  }
+  return false;
+}
+
 bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
                                               ScalarEvolution &SE) {
   Loop *OuterL = InnerLoop->getParentLoop();
diff --git a/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll b/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll
new file mode 100644
index 00000000000000..c5e0c78a6f9530
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -unroll-runtime -passes=loop-unroll < %s | FileCheck %s
+
+; Function Attrs: noinline uwtable
+define void @test(i64 %N) !prof !0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[N:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK:       preloop.preheader:
+; CHECK-NEXT:    br label [[PRELOOP:%.*]]
+; CHECK:       preloop:
+; CHECK-NEXT:    br label [[PRELOOP_PSEUDO_EXIT]]
+; CHECK:       preloop.pseudo.exit:
+; CHECK-NEXT:    [[IV_PRELOOP_COPY:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[PRELOOP]] ]
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[UMAX]], 7
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       loop.prol.preheader:
+; CHECK-NEXT:    br label [[LOOP_PROL:%.*]]
+; CHECK:       loop.prol:
+; CHECK-NEXT:    [[IV_PROL:%.*]] = phi i64 [ [[IV_PRELOOP_COPY]], [[LOOP_PROL_PREHEADER]] ], [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    [[IV_NEXT_PROL]] = add nuw nsw i64 [[IV_PROL]], 1
+; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp ult i64 [[IV_NEXT_PROL]], [[N]]
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF2:![0-9]+]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       loop.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    br label [[LOOP_PROL_LOOPEXIT]]
+; CHECK:       loop.prol.loopexit:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_PRELOOP_COPY]], [[PRELOOP_PSEUDO_EXIT]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT:%.*]], label [[PRELOOP_PSEUDO_EXIT_NEW:%.*]], !prof [[PROF1]]
+; CHECK:       preloop.pseudo.exit.new:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_UNR]], [[PRELOOP_PSEUDO_EXIT_NEW]] ], [ [[IV_NEXT_7:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp ult i64 [[IV_NEXT_7]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_7]], label [[LOOP]], label [[EXIT_UNR_LCSSA:%.*]], !prof [[PROF5:![0-9]+]]
+; CHECK:       exit.unr-lcssa:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 false, label %preloop, label %preloop.pseudo.exit
+
+preloop:
+  %iv.preloop = phi i64 [ %iv.next.preloop, %preloop ], [ 0, %entry ]
+  %iv.next.preloop = add nuw nsw i64 %iv.preloop, 1
+  br i1 false, label %preloop, label %preloop.pseudo.exit, !prof !1
+
+preloop.pseudo.exit:
+  %iv.preloop.copy = phi i64 [ 0, %entry ], [ %iv.next.preloop, %preloop ]
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %iv.preloop.copy, %preloop.pseudo.exit ], [ %iv.next, %loop ]
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit, !prof !1
+
+exit:
+  ret void
+}
+
+!0 = !{!"function_entry_count", i64 32768}
+!1 = !{!"branch_weights", i32 1, i32 0}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 32768}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 127}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 0}
+;.



More information about the llvm-commits mailing list