[llvm] r264407 - Enable non-power-of-2 #pragma unroll counts.

David L Kreitzer via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 25 07:24:52 PDT 2016


Author: dlkreitz
Date: Fri Mar 25 09:24:52 2016
New Revision: 264407

URL: http://llvm.org/viewvc/llvm-project?rev=264407&view=rev
Log:
Enable non-power-of-2 #pragma unroll counts.

Patch by Evgeny Stupachenko.

Differential Revision: http://reviews.llvm.org/D18202

Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
    llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=264407&r1=264406&r2=264407&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Fri Mar 25 09:24:52 2016
@@ -684,11 +684,6 @@ static bool tryToUnrollLoop(Loop *L, Dom
   }
 
   if (HasPragma) {
-    if (PragmaCount != 0)
-      // If loop has an unroll count pragma mark loop as unrolled to prevent
-      // unrolling beyond that requested by the pragma.
-      SetLoopAlreadyUnrolled(L);
-
     // Emit optimization remarks if we are unable to unroll the loop
     // as directed by a pragma.
     DebugLoc LoopLoc = L->getStartLoc();
@@ -738,6 +733,10 @@ static bool tryToUnrollLoop(Loop *L, Dom
                   TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
     return false;
 
+  // If loop has an unroll count pragma mark loop as unrolled to prevent
+  // unrolling beyond that requested by the pragma.
+  if (HasPragma && PragmaCount != 0)
+    SetLoopAlreadyUnrolled(L);
   return true;
 }
 

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp?rev=264407&r1=264406&r2=264407&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp Fri Mar 25 09:24:52 2016
@@ -117,10 +117,10 @@ static void ConnectProlog(Loop *L, Value
 
   assert(Count != 0 && "nonsensical Count!");
 
-  // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
-  // (since Count is a power of 2).  This means %xtraiter is (BECount + 1) and
-  // and all of the iterations of this loop were executed by the prologue.  Note
-  // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
+  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
+  // This means %xtraiter is (BECount + 1) and all of the iterations of this
+  // loop were executed by the prologue.  Note that if BECount <u (Count - 1)
+  // then (BECount + 1) cannot unsigned-overflow.
   Value *BrLoopExit =
       B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
   BasicBlock *Exit = L->getUniqueExitBlock();
@@ -319,11 +319,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
       Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
     return false;
 
-  // We only handle cases when the unroll factor is a power of 2.
-  // Count is the loop unroll factor, the number of extra copies added + 1.
-  if (!isPowerOf2_32(Count))
-    return false;
-
   // This constraint lets us deal with an overflowing trip count easily; see the
   // comment on ModVal below.
   if (Log2_32(Count) > BEWidth)
@@ -349,18 +344,33 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
                                           PreHeaderBR);
 
   IRBuilder<> B(PreHeaderBR);
-  Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
-
-  // If ModVal is zero, we know that either
-  //  1. There are no iterations to be run in the prologue loop.
-  // OR
-  //  2. The addition computing TripCount overflowed.
-  //
-  // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
-  // number of iterations that remain to be run in the original loop is a
-  // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
-  // explicitly check this above).
-
+  Value *ModVal;
+  // Calculate ModVal = (BECount + 1) % Count.
+  // Note that TripCount is BECount + 1.
+  if (isPowerOf2_32(Count)) {
+    ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+    //  1. There are no iterations to be run in the prologue loop.
+    // OR
+    //  2. The addition computing TripCount overflowed.
+    //
+    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+    // the number of iterations that remain to be run in the original loop is a
+    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
+    // explicitly check this above).
+  } else {
+    // As (BECount + 1) can potentially unsigned overflow we count
+    // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+    Value *ModValTmp = B.CreateURem(BECount,
+                                    ConstantInt::get(BECount->getType(),
+                                                     Count));
+    Value *ModValAdd = B.CreateAdd(ModValTmp,
+                                   ConstantInt::get(ModValTmp->getType(), 1));
+    // At that point (BECount % Count) + 1 could be equal to Count.
+    // To handle this case we need to take mod by Count one more time.
+    ModVal = B.CreateURem(ModValAdd,
+                          ConstantInt::get(BECount->getType(), Count),
+                          "xtraiter");
+  }
   Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
 
   // Branch to either the extra iterations or the cloned/unrolled loop.

Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=264407&r1=264406&r2=264407&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Fri Mar 25 09:24:52 2016
@@ -322,3 +322,40 @@ for.end:
   ret void
 }
 !15 = !{!15, !14}
+
+; #pragma clang loop unroll_count(3)
+; Loop has a runtime trip count.  Runtime unrolling should occur and loop
+; should be duplicated (original and 3x unrolled).
+;
+; CHECK-LABEL: @runtime_loop_with_count3(
+; CHECK: for.body.prol:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: for.body
+; CHECK: store
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!16 = !{!16, !17}
+!17 = !{!"llvm.loop.unroll.count", i32 3}




More information about the llvm-commits mailing list