[llvm] r263509 - [LoopUnroll] Respect the convergent attribute.

Justin Lebar via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 14 16:15:34 PDT 2016


Author: jlebar
Date: Mon Mar 14 18:15:34 2016
New Revision: 263509

URL: http://llvm.org/viewvc/llvm-project?rev=263509&view=rev
Log:
[LoopUnroll] Respect the convergent attribute.

Summary:
Specifically, when we perform runtime loop unrolling of a loop that
contains a convergent op, we can only unroll k times, where k divides
the loop trip multiple.

Without this change, we'll happily unroll e.g. the following loop

  for (int i = 0; i < N; ++i) {
    if (i == 0) convergent_op();
    foo();
  }

into

  int i = 0;
  if (N % 2 == 1) {
    convergent_op();
    foo();
    ++i;
  }
  for (; i < N - 1; i += 2) {
    if (i == 0) convergent_op();
    foo();
    foo();
  }.

This is unsafe, because we've just added a control-flow dependency to
the convergent op in the prelude.

In general, runtime unrolling loops that contain convergent ops is safe
only if we don't have emit a prelude, which occurs when the unroll count
divides the trip multiple.

Reviewers: resistor

Subscribers: llvm-commits, mzolotukhin

Differential Revision: http://reviews.llvm.org/D17526

Added:
    llvm/trunk/test/Transforms/LoopUnroll/convergent.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=263509&r1=263508&r2=263509&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Mon Mar 14 18:15:34 2016
@@ -362,7 +362,7 @@ analyzeLoopUnrollCost(const Loop *L, uns
 
 /// ApproximateLoopSize - Approximate the size of the loop.
 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
-                                    bool &NotDuplicatable,
+                                    bool &NotDuplicatable, bool &Convergent,
                                     const TargetTransformInfo &TTI,
                                     AssumptionCache *AC) {
   SmallPtrSet<const Value *, 32> EphValues;
@@ -373,6 +373,7 @@ static unsigned ApproximateLoopSize(cons
     Metrics.analyzeBasicBlock(BB, TTI, EphValues);
   NumCalls = Metrics.NumInlineCandidates;
   NotDuplicatable = Metrics.notDuplicatable;
+  Convergent = Metrics.convergent;
 
   unsigned LoopSize = Metrics.NumInsts;
 
@@ -568,8 +569,9 @@ static bool tryToUnrollLoop(Loop *L, Dom
 
   unsigned NumInlineCandidates;
   bool NotDuplicatable;
-  unsigned LoopSize =
-      ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, TTI, &AC);
+  bool Convergent;
+  unsigned LoopSize = ApproximateLoopSize(
+      L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
   DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
 
   // When computing the unrolled size, note that the conditional branch on the
@@ -623,6 +625,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
   if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
     AllowRuntime = false;
   }
+  bool DecreasedCountDueToConvergence = false;
   if (Unrolling == Partial) {
     bool AllowPartial = PragmaEnableUnroll || UP.Partial;
     if (!AllowPartial && !CountSetExplicitly) {
@@ -643,14 +646,40 @@ static bool tryToUnrollLoop(Loop *L, Dom
                    << "-unroll-runtime not given\n");
       return false;
     }
+
     // Reduce unroll count to be the largest power-of-two factor of
     // the original count which satisfies the threshold limit.
     while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
       Count >>= 1;
       UnrolledSize = (LoopSize-2) * Count + 2;
     }
+
     if (Count > UP.MaxCount)
       Count = UP.MaxCount;
+
+    // If the loop contains a convergent operation, the prelude we'd add
+    // to do the first few instructions before we hit the unrolled loop
+    // is unsafe -- it adds a control-flow dependency to the convergent
+    // operation.  Therefore Count must divide TripMultiple.
+    //
+    // TODO: This is quite conservative.  In practice, convergent_op()
+    // is likely to be called unconditionally in the loop.  In this
+    // case, the program would be ill-formed (on most architectures)
+    // unless n were the same on all threads in a thread group.
+    // Assuming n is the same on all threads, any kind of unrolling is
+    // safe.  But currently llvm's notion of convergence isn't powerful
+    // enough to express this.
+    unsigned OrigCount = Count;
+    while (Convergent && Count != 0 && TripMultiple % Count != 0) {
+      DecreasedCountDueToConvergence = true;
+      Count >>= 1;
+    }
+    if (OrigCount > Count) {
+      DEBUG(dbgs() << "  loop contains a convergent instruction, so unroll "
+                      "count must divide the trip multiple, "
+                   << TripMultiple << ".  Reducing unroll count from "
+                   << OrigCount << " to " << Count << ".\n");
+    }
     DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
   }
 
@@ -665,7 +694,16 @@ static bool tryToUnrollLoop(Loop *L, Dom
     DebugLoc LoopLoc = L->getStartLoc();
     Function *F = Header->getParent();
     LLVMContext &Ctx = F->getContext();
-    if ((PragmaCount > 0) && Count != OriginalCount) {
+    if (PragmaCount > 0 && DecreasedCountDueToConvergence) {
+      emitOptimizationRemarkMissed(
+          Ctx, DEBUG_TYPE, *F, LoopLoc,
+          Twine("Unable to unroll loop the number of times directed by "
+                "unroll_count pragma because the loop contains a convergent "
+                "instruction, and so must have an unroll count that divides "
+                "the loop trip multiple of ") +
+              Twine(TripMultiple) + ".  Unrolling instead " + Twine(Count) +
+              " time(s).");
+    } else if ((PragmaCount > 0) && Count != OriginalCount) {
       emitOptimizationRemarkMissed(
           Ctx, DEBUG_TYPE, *F, LoopLoc,
           "Unable to unroll loop the number of times directed by "

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp?rev=263509&r1=263508&r2=263509&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Mon Mar 14 18:15:34 2016
@@ -273,7 +273,23 @@ bool llvm::UnrollLoop(Loop *L, unsigned
   // flag is specified.
   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
 
-  if (RuntimeTripCount &&
+  // Loops containing convergent instructions must have a count that divides
+  // their TripMultiple.
+  DEBUG(
+    bool HasConvergent = false;
+    for (auto &BB : L->blocks())
+      for (auto &I : *BB)
+        if (auto CS = CallSite(&I))
+          HasConvergent |= CS.isConvergent();
+    assert(
+        !HasConvergent || TripMultiple % Count == 0 &&
+        "Unroll count must divide trip multiple if loop contains a convergent "
+        "operation.");
+  );
+  // Don't output the runtime loop prolog if Count is a multiple of
+  // TripMultiple.  Such a prolog is never needed, and is unsafe if the loop
+  // contains a convergent instruction.
+  if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
                                PreserveLCSSA))
     return false;

Added: llvm/trunk/test/Transforms/LoopUnroll/convergent.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/convergent.ll?rev=263509&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/convergent.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/convergent.ll Mon Mar 14 18:15:34 2016
@@ -0,0 +1,83 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
+
+declare void @f() convergent
+
+; Although this loop contains a convergent instruction, it should be
+; fully unrolled.
+;
+; CHECK-LABEL: @full_unroll(
+define i32 @full_unroll() {
+entry:
+  br label %l3
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() ;convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %exit, label %l3
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction, but it should be partially
+; unrolled.  The unroll count is the largest power of 2 that divides the
+; multiple -- 4, in this case.
+;
+; CHECK-LABEL: @runtime_unroll(
+define i32 @runtime_unroll(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 12
+  br label %l3
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction, so its partial unroll
+; count must divide its trip multiple.  This overrides its unroll
+; pragma -- we unroll exactly 8 times, even though 16 is requested.
+; CHECK-LABEL: @pragma_unroll
+define i32 @pragma_unroll(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 24
+  br label %l3, !llvm.loop !0
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
+
+exit:
+  ret i32 0
+}
+
+!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}




More information about the llvm-commits mailing list