[llvm] r264407 - Enable non-power-of-2 #pragma unroll counts.
David L Kreitzer via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 25 07:24:52 PDT 2016
Author: dlkreitz
Date: Fri Mar 25 09:24:52 2016
New Revision: 264407
URL: http://llvm.org/viewvc/llvm-project?rev=264407&view=rev
Log:
Enable non-power-of-2 #pragma unroll counts.
Patch by Evgeny Stupachenko.
Differential Revision: http://reviews.llvm.org/D18202
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=264407&r1=264406&r2=264407&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Fri Mar 25 09:24:52 2016
@@ -684,11 +684,6 @@ static bool tryToUnrollLoop(Loop *L, Dom
}
if (HasPragma) {
- if (PragmaCount != 0)
- // If loop has an unroll count pragma mark loop as unrolled to prevent
- // unrolling beyond that requested by the pragma.
- SetLoopAlreadyUnrolled(L);
-
// Emit optimization remarks if we are unable to unroll the loop
// as directed by a pragma.
DebugLoc LoopLoc = L->getStartLoc();
@@ -738,6 +733,10 @@ static bool tryToUnrollLoop(Loop *L, Dom
TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
return false;
+ // If loop has an unroll count pragma mark loop as unrolled to prevent
+ // unrolling beyond that requested by the pragma.
+ if (HasPragma && PragmaCount != 0)
+ SetLoopAlreadyUnrolled(L);
return true;
}
Modified: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp?rev=264407&r1=264406&r2=264407&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp Fri Mar 25 09:24:52 2016
@@ -117,10 +117,10 @@ static void ConnectProlog(Loop *L, Value
assert(Count != 0 && "nonsensical Count!");
- // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
- // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
- // and all of the iterations of this loop were executed by the prologue. Note
- // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
+ // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
+ // This means %xtraiter is (BECount + 1) and all of the iterations of this
+ // loop were executed by the prologue. Note that if BECount <u (Count - 1)
+ // then (BECount + 1) cannot unsigned-overflow.
Value *BrLoopExit =
B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock();
@@ -319,11 +319,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
return false;
- // We only handle cases when the unroll factor is a power of 2.
- // Count is the loop unroll factor, the number of extra copies added + 1.
- if (!isPowerOf2_32(Count))
- return false;
-
// This constraint lets us deal with an overflowing trip count easily; see the
// comment on ModVal below.
if (Log2_32(Count) > BEWidth)
@@ -349,18 +344,33 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
- Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
-
- // If ModVal is zero, we know that either
- // 1. There are no iterations to be run in the prologue loop.
- // OR
- // 2. The addition computing TripCount overflowed.
- //
- // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
- // number of iterations that remain to be run in the original loop is a
- // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
- // explicitly check this above).
-
+ Value *ModVal;
+ // Calculate ModVal = (BECount + 1) % Count.
+ // Note that TripCount is BECount + 1.
+ if (isPowerOf2_32(Count)) {
+ ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+ // 1. There are no iterations to be run in the prologue loop.
+ // OR
+ // 2. The addition computing TripCount overflowed.
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+ // the number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
+ // explicitly check this above).
+ } else {
+ // As (BECount + 1) can potentially unsigned overflow we count
+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+ Value *ModValTmp = B.CreateURem(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count));
+ Value *ModValAdd = B.CreateAdd(ModValTmp,
+ ConstantInt::get(ModValTmp->getType(), 1));
+ // At that point (BECount % Count) + 1 could be equal to Count.
+ // To handle this case we need to take mod by Count one more time.
+ ModVal = B.CreateURem(ModValAdd,
+ ConstantInt::get(BECount->getType(), Count),
+ "xtraiter");
+ }
Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
// Branch to either the extra iterations or the cloned/unrolled loop.
Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=264407&r1=264406&r2=264407&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Fri Mar 25 09:24:52 2016
@@ -322,3 +322,40 @@ for.end:
ret void
}
!15 = !{!15, !14}
+
+; #pragma clang loop unroll_count(3)
+; Loop has a runtime trip count. Runtime unrolling should occur and loop
+; should be duplicated (original and 3x unrolled).
+;
+; CHECK-LABEL: @runtime_loop_with_count3(
+; CHECK: for.body.prol:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: for.body
+; CHECK: store
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
+entry:
+ %cmp3 = icmp sgt i32 %b, 0
+ br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %b
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+!16 = !{!16, !17}
+!17 = !{!"llvm.loop.unroll.count", i32 3}
More information about the llvm-commits
mailing list