[llvm] [LoopUnroll] Allow expensive trip count emitting which is estimated to be infinite (PR #79869)

Aleksandr Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 5 06:21:07 PST 2024


https://github.com/aleks-tmb updated https://github.com/llvm/llvm-project/pull/79869

>From 0143684b48f313a668357d6daa7f9e64b040416c Mon Sep 17 00:00:00 2001
From: Aleksandr Popov <apopov at azul.com>
Date: Mon, 29 Jan 2024 02:49:05 -0800
Subject: [PATCH] [LoopUnroll] Allow expensive trip count emitting which is
 estimated to be infinite

We now set the AllowExpensiveTripCount flag if we could get an estimated
trip count and that trip count is small.

But if we have an exit branch weights ratio of 1:0, which means a very
large trip count, we skip setting the flag because we say that the trip
count is not esimated.

This patch adds case handling to allow unroll for such loops.
---
 .../include/llvm/Transforms/Utils/LoopUtils.h |  4 +
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |  3 +-
 llvm/lib/Transforms/Utils/LoopUtils.cpp       | 15 ++++
 .../infinite-expensive-trip-count.ll          | 81 +++++++++++++++++++
 4 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll

diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 5a1385d01d8e4..2c40309e2223c 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -330,6 +330,10 @@ getLoopEstimatedTripCount(Loop *L,
 bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
                                unsigned EstimatedLoopInvocationWeight);
 
+// Return true if loop's estimated trip count based on branch weight metadata is
+// infinite, i.e. weight ratio is X:0.
+bool isInfiniteTripCount(Loop *L);
+
 /// Check inner loop (L) backedge count is known to be invariant on all
 /// iterations of its outer loop. If the loop has no parent, this is trivially
 /// true.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7cfeb019af972..763e19afc7037 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1041,7 +1041,8 @@ bool llvm::computeUnrollCount(
         return false;
       else
         UP.AllowExpensiveTripCount = true;
-    }
+    } else if (isInfiniteTripCount(L))
+      UP.AllowExpensiveTripCount = true;
   }
   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
   if (!UP.Runtime) {
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 59485126b280a..3e1015de8f336 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -895,6 +895,21 @@ bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
   return true;
 }
 
+bool llvm::isInfiniteTripCount(Loop *L) {
+  if (BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L)) {
+    uint64_t LoopWeight, ExitWeight;
+    if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
+      return false;
+
+    if (L->contains(ExitingBranch->getSuccessor(1)))
+      std::swap(LoopWeight, ExitWeight);
+
+    if (!ExitWeight)
+      return true;
+  }
+  return false;
+}
+
 bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
                                               ScalarEvolution &SE) {
   Loop *OuterL = InnerLoop->getParentLoop();
diff --git a/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll b/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll
new file mode 100644
index 0000000000000..c5e0c78a6f953
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/infinite-expensive-trip-count.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -unroll-runtime -passes=loop-unroll < %s | FileCheck %s
+
+; Function Attrs: noinline uwtable
+define void @test(i64 %N) !prof !0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[N:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK:       preloop.preheader:
+; CHECK-NEXT:    br label [[PRELOOP:%.*]]
+; CHECK:       preloop:
+; CHECK-NEXT:    br label [[PRELOOP_PSEUDO_EXIT]]
+; CHECK:       preloop.pseudo.exit:
+; CHECK-NEXT:    [[IV_PRELOOP_COPY:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[PRELOOP]] ]
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[UMAX]], 7
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       loop.prol.preheader:
+; CHECK-NEXT:    br label [[LOOP_PROL:%.*]]
+; CHECK:       loop.prol:
+; CHECK-NEXT:    [[IV_PROL:%.*]] = phi i64 [ [[IV_PRELOOP_COPY]], [[LOOP_PROL_PREHEADER]] ], [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    [[IV_NEXT_PROL]] = add nuw nsw i64 [[IV_PROL]], 1
+; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp ult i64 [[IV_NEXT_PROL]], [[N]]
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF2:![0-9]+]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       loop.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ]
+; CHECK-NEXT:    br label [[LOOP_PROL_LOOPEXIT]]
+; CHECK:       loop.prol.loopexit:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_PRELOOP_COPY]], [[PRELOOP_PSEUDO_EXIT]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT:%.*]], label [[PRELOOP_PSEUDO_EXIT_NEW:%.*]], !prof [[PROF1]]
+; CHECK:       preloop.pseudo.exit.new:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_UNR]], [[PRELOOP_PSEUDO_EXIT_NEW]] ], [ [[IV_NEXT_7:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp ult i64 [[IV_NEXT_7]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_7]], label [[LOOP]], label [[EXIT_UNR_LCSSA:%.*]], !prof [[PROF5:![0-9]+]]
+; CHECK:       exit.unr-lcssa:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 false, label %preloop, label %preloop.pseudo.exit
+
+preloop:
+  %iv.preloop = phi i64 [ %iv.next.preloop, %preloop ], [ 0, %entry ]
+  %iv.next.preloop = add nuw nsw i64 %iv.preloop, 1
+  br i1 false, label %preloop, label %preloop.pseudo.exit, !prof !1
+
+preloop.pseudo.exit:
+  %iv.preloop.copy = phi i64 [ 0, %entry ], [ %iv.next.preloop, %preloop ]
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %iv.preloop.copy, %preloop.pseudo.exit ], [ %iv.next, %loop ]
+  %iv.next = add nuw nsw i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit, !prof !1
+
+exit:
+  ret void
+}
+
+!0 = !{!"function_entry_count", i64 32768}
+!1 = !{!"branch_weights", i32 1, i32 0}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 32768}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 127}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 0}
+;.



More information about the llvm-commits mailing list