[llvm] r326585 - LoopUnroll: respect pragma unroll when AllowRemainder is disabled
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 2 08:22:33 PST 2018
Author: yaxunl
Date: Fri Mar 2 08:22:32 2018
New Revision: 326585
URL: http://llvm.org/viewvc/llvm-project?rev=326585&view=rev
Log:
LoopUnroll: respect pragma unroll when AllowRemainder is disabled
Currently when AllowRemainder is disabled, pragma unroll count is not
respected even though there is no remainder. This bug causes a loop
fully unrolled in many cases even though the user specifies a unroll
count. Especially it affects OpenCL/CUDA since in many cases a loop
contains convergent instructions and currently AllowRemainder is
disabled for such loops.
Differential Revision: https://reviews.llvm.org/D43826
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/trunk/test/Transforms/LoopUnroll/convergent.ll
llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=326585&r1=326584&r2=326585&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Fri Mar 2 08:22:32 2018
@@ -729,7 +729,7 @@ static bool computeUnrollCount(
UP.Runtime = true;
UP.AllowExpensiveTripCount = true;
UP.Force = true;
- if (UP.AllowRemainder &&
+ if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return true;
}
Modified: llvm/trunk/test/Transforms/LoopUnroll/convergent.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/convergent.ll?rev=326585&r1=326584&r2=326585&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/convergent.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/convergent.ll Fri Mar 2 08:22:32 2018
@@ -80,4 +80,100 @@ exit:
ret i32 0
}
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip count 4. The loop unroll should respect the pragma.
+; CHECK-LABEL: @pragma_unroll_divisible_trip_count
+define void @pragma_unroll_divisible_trip_count() {
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() convergent
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret void
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
+; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple
+define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
+entry:
+ %loop_ctl = mul nsw i32 %n, 2
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() convergent
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 is unknown to divide runtime trip count, the loop is not unrolled
+; since remainder is forbidden for unrolling convergent loop.
+; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
+; in the future.
+; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count
+define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() convergent
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 does not divide trip count 5, the loop is not unrolled by 2
+; since remainder is forbidden for unrolling convergent loop. Instead, the
+; loop gets fully unrolled.
+; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
+; in the future.
+; CHECK-LABEL: @pragma_unroll_indivisible_trip_count
+define i32 @pragma_unroll_indivisible_trip_count() {
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() convergent
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 5
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
+!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}
+
Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=326585&r1=326584&r2=326585&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Fri Mar 2 08:22:32 2018
@@ -1,5 +1,6 @@
-; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
-; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
+; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
;
; Run loop unrolling twice to verify that loop unrolling metadata is properly
; removed and further unrolling is disabled after the pass is run once.
@@ -168,20 +169,24 @@ for.end:
; #pragma clang loop unroll_count(4)
; Loop has a runtime trip count. Runtime unrolling should occur and loop
-; should be duplicated (original and 4x unrolled).
+; should be duplicated (original and 4x unrolled) if remainder is allowed,
+; otherwise loop should not be unrolled.
;
; CHECK-LABEL: @runtime_loop_with_count4(
; CHECK: for.body
; CHECK: store
-; CHECK: store
-; CHECK: store
-; CHECK: store
+; REM: store
+; REM: store
+; REM: store
; CHECK-NOT: store
; CHECK: br i1
-; CHECK: for.body.epil:
-; CHECK: store
+; REM: for.body.epil:
+; REM: store
+; NOREM-NOT: for.body.epil:
+; NOREM-NOT: store
; CHECK-NOT: store
-; CHECK: br i1
+; REM: br i1
+; NOREM-NOT: br i1
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -284,24 +289,27 @@ for.end:
; #pragma clang loop unroll(enable)
; Loop has a runtime trip count and should be runtime unrolled and duplicated
-; (original and 8x).
+; (original and 8x) if remainder is allowed, otherwise it should not be
+; unrolled.
;
; CHECK-LABEL: @runtime_loop_with_enable(
; CHECK: for.body:
; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
; CHECK-NOT: store i32
; CHECK: br i1
-; CHECK: for.body.epil:
-; CHECK: store
+; REM: for.body.epil:
+; NOREM-NOT: for.body.epil:
+; REM: store
; CHECK-NOT: store
-; CHECK: br i1
+; REM: br i1
+; NOREM-NOT: br i1
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -325,19 +333,22 @@ for.end:
; #pragma clang loop unroll_count(3)
; Loop has a runtime trip count. Runtime unrolling should occur and loop
-; should be duplicated (original and 3x unrolled).
+; should be duplicated (original and 3x unrolled) if remainder is allowed,
+; otherwise it should not be unrolled.
;
; CHECK-LABEL: @runtime_loop_with_count3(
; CHECK: for.body
; CHECK: store
-; CHECK: store
-; CHECK: store
+; REM: store
+; REM: store
; CHECK-NOT: store
; CHECK: br i1
-; CHECK: for.body.epil:
-; CHECK: store
+; REM: for.body.epil:
+; REM: store
+; NOREM-NOT: for.body.epil:
+; NOREM-NOT: store
; CHECK-NOT: store
-; CHECK: br i1
+; REM: br i1
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
More information about the llvm-commits
mailing list