[llvm] [LoopUnroll] Allow expensive trip count emitting which is estimated to be infinite (PR #79869)

Aleksandr Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 29 09:40:16 PST 2024


https://github.com/aleks-tmb created https://github.com/llvm/llvm-project/pull/79869

We now set the AllowExpensiveTripCount flag if we could get an estimated trip count and that trip count is small.

But if we have an exit branch weights ratio of 1:0, which means a very large trip count, we skip setting the flag because we say that the trip count is not esimated.

This patch adds case handling to allow unroll for such loops.

>From 7ef0291b6f8662ac52cc730958bec963c2570958 Mon Sep 17 00:00:00 2001
From: Aleksandr Popov <apopov at azul.com>
Date: Mon, 29 Jan 2024 02:49:05 -0800
Subject: [PATCH] [LoopUnroll] Allow expensive trip count emitting which is
 estimated as infinite

We now set the AllowExpensiveTripCount flag if we could get an estimated
trip count and that trip count is small.

But if we have an exit branch weights ratio of 1:0, which means a very
large trip count, we skip setting the flag because we say that the trip
count is not esimated.

This patch adds case handling to allow unroll for such loops.
---
 .../include/llvm/Transforms/Utils/LoopUtils.h |  4 +
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |  3 +-
 llvm/lib/Transforms/Utils/LoopUtils.cpp       | 15 ++++
 .../infinite-expensive-trip-count.ll          | 81 +++++++++++++++++++
 4 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll

diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 5a1385d01d8e44d..2c40309e2223c59 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -330,6 +330,10 @@ getLoopEstimatedTripCount(Loop *L,
 bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
                                unsigned EstimatedLoopInvocationWeight);
 
+// Return true if loop's estimated trip count based on branch weight metadata is
+// infinite, i.e. weight ratio is X:0.
+bool isInfiniteTripCount(Loop *L);
+
 /// Check inner loop (L) backedge count is known to be invariant on all
 /// iterations of its outer loop. If the loop has no parent, this is trivially
 /// true.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7cfeb019af97232..2f3d33121b0a7e0 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1041,7 +1041,8 @@ bool llvm::computeUnrollCount(
         return false;
       else
         UP.AllowExpensiveTripCount = true;
-    }
+    } else if (isInfiniteTripCount(L))
+        UP.AllowExpensiveTripCount = true;
   }
   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
   if (!UP.Runtime) {
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 59485126b280abf..8d85d0ec8870fca 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -895,6 +895,21 @@ bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
   return true;
 }
 
+bool llvm::isInfiniteTripCount(Loop *L) {
+  if (BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L)) {
+    uint64_t LoopWeight, ExitWeight;
+    if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
+      return false;
+
+    if (L->contains(ExitingBranch->getSuccessor(1)))
+      std::swap(LoopWeight, ExitWeight);
+
+    if (!ExitWeight)
+      return true;    
+  }
+  return false;
+}
+
 bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
                                               ScalarEvolution &SE) {
   Loop *OuterL = InnerLoop->getParentLoop();
diff --git a/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll b/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll
new file mode 100644
index 000000000000000..c5e0c78a6f9530b
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -unroll-runtime -passes=loop-unroll < %s | FileCheck %s
+
+; Function Attrs: noinline uwtable
+define void @test(i64 %N) !prof !0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[N:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK:       preloop.preheader:
+; CHECK-NEXT:    br label [[PRELOOP:%.*]]
+; CHECK:       preloop:
+; CHECK-NEXT:    br label [[PRELOOP_PSEUDO_EXIT]]
+; CHECK:       preloop.pseudo.exit:
+; CHECK-NEXT:    [[IV_PRELOOP_COPY:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[PRELOOP]] ]
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[UMAX]], 7
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       loop.prol.preheader:
+; CHECK-NEXT:    br label [[LOOP_PROL:%.*]]
+; CHECK:       loop.prol:
+; CHECK-NEXT:    [[IV_PROL:%.*]] = phi i64 [ [[IV_PRELOOP_COPY]], [[LOOP_PROL_PREHEADER]] ], [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    [[IV_NEXT_PROL]] = add nuw nsw i64 [[IV_PROL]], 1
+; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp ult i64 [[IV_NEXT_PROL]], [[N]]
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF2:![0-9]+]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       loop.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    br label [[LOOP_PROL_LOOPEXIT]]
+; CHECK:       loop.prol.loopexit:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_PRELOOP_COPY]], [[PRELOOP_PSEUDO_EXIT]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT:%.*]], label [[PRELOOP_PSEUDO_EXIT_NEW:%.*]], !prof [[PROF1]]
+; CHECK:       preloop.pseudo.exit.new:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_UNR]], [[PRELOOP_PSEUDO_EXIT_NEW]] ], [ [[IV_NEXT_7:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp ult i64 [[IV_NEXT_7]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_7]], label [[LOOP]], label [[EXIT_UNR_LCSSA:%.*]], !prof [[PROF5:![0-9]+]]
+; CHECK:       exit.unr-lcssa:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 false, label %preloop, label %preloop.pseudo.exit
+
+preloop:
+  %iv.preloop = phi i64 [ %iv.next.preloop, %preloop ], [ 0, %entry ]
+  %iv.next.preloop = add nuw nsw i64 %iv.preloop, 1
+  br i1 false, label %preloop, label %preloop.pseudo.exit, !prof !1
+
+preloop.pseudo.exit:
+  %iv.preloop.copy = phi i64 [ 0, %entry ], [ %iv.next.preloop, %preloop ]
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %iv.preloop.copy, %preloop.pseudo.exit ], [ %iv.next, %loop ]
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit, !prof !1
+
+exit:
+  ret void
+}
+
+!0 = !{!"function_entry_count", i64 32768}
+!1 = !{!"branch_weights", i32 1, i32 0}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 32768}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 127}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 0}
+;.



More information about the llvm-commits mailing list