[llvm] aea2a14 - [LoopUnroll] Prevent LoopFullUnrollPass to perform partial/runtime unrolling

Yashwant Singh via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 12 21:51:46 PDT 2023


Author: Yashwant Singh
Date: 2023-04-13T10:21:24+05:30
New Revision: aea2a147363197f3cef140ebcd27b534fd41877b

URL: https://github.com/llvm/llvm-project/commit/aea2a147363197f3cef140ebcd27b534fd41877b
DIFF: https://github.com/llvm/llvm-project/commit/aea2a147363197f3cef140ebcd27b534fd41877b.diff

LOG: [LoopUnroll] Prevent LoopFullUnrollPass to perform partial/runtime unrolling

FullLoopUnroll was performing runtime unrolling in certain cases when
'#pragma unroll' was specified. Patch to fix this by introducing new parameter
to tryToUnrollLoop() to differentiate between LoopUnrollPass and
FullLoopUnrollPass. Based on the discussion here
(https://discourse.llvm.org/t/loop-unroller-fails-to-unroll-loop/69834)

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D148071

Added: 
    llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/test/Transforms/LoopUnroll/revisit.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index deb0dc2d57a86..70b9ff33c5d55 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1124,7 +1124,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
                 const TargetTransformInfo &TTI, AssumptionCache &AC,
                 OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
                 ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
-                bool OnlyWhenForced, bool ForgetAllSCEV,
+                bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
                 std::optional<unsigned> ProvidedCount,
                 std::optional<unsigned> ProvidedThreshold,
                 std::optional<bool> ProvidedAllowPartial,
@@ -1133,6 +1133,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
                 std::optional<bool> ProvidedAllowPeeling,
                 std::optional<bool> ProvidedAllowProfileBasedPeeling,
                 std::optional<unsigned> ProvidedFullUnrollMaxCount) {
+
   LLVM_DEBUG(dbgs() << "Loop Unroll: F["
                     << L->getHeader()->getParent()->getName() << "] Loop %"
                     << L->getHeader()->getName() << "\n");
@@ -1304,6 +1305,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
     return LoopUnrollResult::Unmodified;
   }
 
+  // Do not attempt partial/runtime unrolling in FullLoopUnrolling
+  if (OnlyFullUnroll && !(UP.Count >= MaxTripCount)) {
+    LLVM_DEBUG(
+        dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
+    return LoopUnrollResult::Unmodified;
+  }
+
   // At this point, UP.Runtime indicates that run-time unrolling is allowed.
   // However, we only want to actually perform it if we don't know the trip
   // count and the unroll count doesn't divide the known trip multiple.
@@ -1420,10 +1428,10 @@ class LoopUnroll : public LoopPass {
 
     LoopUnrollResult Result = tryToUnrollLoop(
         L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
-        OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
-        ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
-        ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
-        ProvidedFullUnrollMaxCount);
+        /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
+        ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
+        ProvidedUpperBound, ProvidedAllowPeeling,
+        ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
 
     if (Result == LoopUnrollResult::FullyUnrolled)
       LPM.markLoopAsDeleted(*L);
@@ -1497,8 +1505,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
   bool Changed =
       tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
                       /*BFI*/ nullptr, /*PSI*/ nullptr,
-                      /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
-                      ForgetSCEV, /*Count*/ std::nullopt,
+                      /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
+                      OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
                       /*Threshold*/ std::nullopt, /*AllowPartial*/ false,
                       /*Runtime*/ false, /*UpperBound*/ false,
                       /*AllowPeeling*/ true,
@@ -1623,8 +1631,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
     // flavors of unrolling during construction time (by setting UnrollOpts).
     LoopUnrollResult Result = tryToUnrollLoop(
         &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
-        /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
-        UnrollOpts.ForgetSCEV, /*Count*/ std::nullopt,
+        /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
+        UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
+        /*Count*/ std::nullopt,
         /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
         UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
         UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);

diff  --git a/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll
new file mode 100644
index 0000000000000..7f266a754d1bc
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll
@@ -0,0 +1,94 @@
+; RUN: opt -S -passes=loop-unroll --debug-only=loop-unroll < %s 2>&1 | FileCheck %s -check-prefix=LOOP-UNROLL
+; RUN: opt -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --debug-only=loop-unroll < %s 2>&1 | FileCheck %s -check-prefix=LOOP-UNROLL-FULL
+
+; REQUIRES: asserts
+
+%struct.HIP_vector_type = type {  %union.anon }
+%union.anon = type { <2 x float> }
+
+
+; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body
+; LOOP-UNROLL-NEXT: Loop Size = 9
+; LOOP-UNROLL-NEXT: runtime unrolling with count: 8
+; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop:
+; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body<header><latch><exiting>
+; LOOP-UNROLL-NEXT: Using epilog remainder.
+; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 8 with run-time trip count!
+
+; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body
+; LOOP-UNROLL-FULL-NEXT: Loop Size = 9
+; LOOP-UNROLL-FULL-NEXT:  runtime unrolling with count: 8
+; LOOP-UNROLL-FULL-NEXT: Not attempting partial/runtime unroll in FullLoopUnroll
+define void @pragma_unroll(ptr %queue, i32 %num_elements) {
+entry:
+  %cmp5 = icmp sgt i32 %num_elements, 0
+  br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %add = add nuw nsw i32 %i.06, 1
+  %idxprom = zext i32 %add to i64
+  %arrayidx = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom
+  %idxprom1 = zext i32 %i.06 to i64
+  %arrayidx2 = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom1
+  %0 = load i64, ptr %arrayidx, align 8
+  store i64 %0, ptr %arrayidx2, align 8
+  %exitcond = icmp ne i32 %add, %num_elements
+  br i1 %exitcond, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !1
+}
+
+; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body
+; LOOP-UNROLL-NEXT: Loop Size = 9
+; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop:
+; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body<header><latch><exiting>
+; LOOP-UNROLL-NEXT: Using epilog remainder.
+; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 5 with run-time trip count!
+
+; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body
+; LOOP-UNROLL-FULL-NEXT: Loop Size = 9
+; LOOP-UNROLL-FULL-NEXT: Not attempting partial/runtime unroll in FullLoopUnroll
+define void @pragma_unroll_count1(ptr %queue, i32 %num_elements) {
+entry:
+  %cmp5 = icmp sgt i32 %num_elements, 0
+  br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %add = add nuw nsw i32 %i.06, 1
+  %idxprom = zext i32 %add to i64
+  %arrayidx = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom
+  %idxprom1 = zext i32 %i.06 to i64
+  %arrayidx2 = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom1
+  %0 = load i64, ptr %arrayidx, align 8
+  store i64 %0, ptr %arrayidx2, align 8
+  %exitcond = icmp ne i32 %add, %num_elements
+  br i1 %exitcond, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !3
+}
+
+; LOOP-UNROLL: llvm.loop.unroll.disable
+; LOOP-UNROLL-FULL: llvm.loop.unroll.enable
+!0 = !{!"llvm.loop.unroll.enable"}
+!1 = distinct !{!1, !0}
+
+!2 = !{!"llvm.loop.unroll.count", i32 5}
+!3 = distinct !{!3, !2}

diff  --git a/llvm/test/Transforms/LoopUnroll/revisit.ll b/llvm/test/Transforms/LoopUnroll/revisit.ll
index 80a4917b2d7f0..de1f02ac997da 100644
--- a/llvm/test/Transforms/LoopUnroll/revisit.ll
+++ b/llvm/test/Transforms/LoopUnroll/revisit.ll
@@ -1,17 +1,11 @@
 ; This test checks that nested loops are revisited in various scenarios when
 ; unrolling. Note that if we ever start doing outer loop peeling a test case
-; for that should be added here that will look essentially like a hybrid of the
-; current two cases.
+; for that should be added here.
 ;
 ; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
 ; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
 ; RUN:     | FileCheck %s
-;
-; Also run in a special mode that visits children.
-; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \
-; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
-; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN
-
+; 
 ; Basic test is fully unrolled and we revisit the post-unroll new sibling
 ; loops, including the ones that used to be child loops.
 define void @full_unroll(ptr %ptr) {
@@ -76,81 +70,3 @@ l0.latch:
 exit:
   ret void
 }
-
-; Now we test forced runtime partial unrolling with metadata. Here we end up
-; duplicating child loops without changing their structure and so they aren't by
-; default visited, but will be visited with a special parameter.
-define void @partial_unroll(i32 %count, ptr %ptr) {
-; CHECK-LABEL: OptimizationRemarkEmitterAnalysis on partial_unroll
-; CHECK-NOT: LoopFullUnrollPass
-
-entry:
-  br label %l0
-
-l0:
-  %cond.0 = load volatile i1, ptr %ptr
-  br i1 %cond.0, label %l0.0.ph, label %exit
-
-l0.0.ph:
-  br label %l0.0
-
-l0.0:
-  %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
-  %iv.next = add i32 %iv, 1
-  br label %l0.0.0.ph
-
-l0.0.0.ph:
-  br label %l0.0.0
-
-l0.0.0:
-  %cond.0.0.0 = load volatile i1, ptr %ptr
-  br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
-; CHECK: LoopFullUnrollPass on l0.0.0
-; CHECK-NOT: LoopFullUnrollPass
-
-l0.0.1.ph:
-  br label %l0.0.1
-
-l0.0.1:
-  %cond.0.0.1 = load volatile i1, ptr %ptr
-  br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
-; CHECK: LoopFullUnrollPass on l0.0.1
-; CHECK-NOT: LoopFullUnrollPass
-
-l0.0.latch:
-  %cmp = icmp slt i32 %iv.next, %count
-  br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1
-; CHECK: LoopFullUnrollPass on l0.0
-; CHECK-NOT: LoopFullUnrollPass
-;
-; Partial unrolling occurs which introduces both new child loops and new sibling
-; loops. We only visit the child loops in a special mode, not by default.
-; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.0
-; CHECK-CHILDREN-NOT: LoopFullUnrollPass
-; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.1
-; CHECK-CHILDREN-NOT: LoopFullUnrollPass
-; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.0.1
-; CHECK-CHILDREN-NOT: LoopFullUnrollPass
-; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.1.1
-; CHECK-CHILDREN-NOT: LoopFullUnrollPass
-;
-; When we revisit children, we also revisit the current loop.
-; CHECK-CHILDREN: LoopFullUnrollPass on l0.0
-; CHECK-CHILDREN-NOT: LoopFullUnrollPass
-;
-; Revisit the children of the outer loop that are part of the epilogue.
-;
-; CHECK: LoopFullUnrollPass on l0.0.1.epil
-; CHECK-NOT: LoopFullUnrollPass
-; CHECK: LoopFullUnrollPass on l0.0.0.epil
-; CHECK-NOT: LoopFullUnrollPass
-l0.latch:
-  br label %l0
-; CHECK: LoopFullUnrollPass on l0
-; CHECK-NOT: LoopFullUnrollPass
-
-exit:
-  ret void
-}
-!1 = !{!1, !2}
-!2 = !{!"llvm.loop.unroll.count", i32 2}


        


More information about the llvm-commits mailing list