[llvm] r326585 - LoopUnroll: respect pragma unroll when AllowRemainder is disabled

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 2 08:22:33 PST 2018


Author: yaxunl
Date: Fri Mar  2 08:22:32 2018
New Revision: 326585

URL: http://llvm.org/viewvc/llvm-project?rev=326585&view=rev
Log:
LoopUnroll: respect pragma unroll when AllowRemainder is disabled

Currently when AllowRemainder is disabled, pragma unroll count is not
respected even though there is no remainder. This bug causes a loop
fully unrolled in many cases even though the user specifies a unroll
count. Especially it affects OpenCL/CUDA since in many cases a loop
contains convergent instructions and currently AllowRemainder is
disabled for such loops.

Differential Revision: https://reviews.llvm.org/D43826

Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/trunk/test/Transforms/LoopUnroll/convergent.ll
    llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=326585&r1=326584&r2=326585&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Fri Mar  2 08:22:32 2018
@@ -729,7 +729,7 @@ static bool computeUnrollCount(
     UP.Runtime = true;
     UP.AllowExpensiveTripCount = true;
     UP.Force = true;
-    if (UP.AllowRemainder &&
+    if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
         getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
       return true;
   }

Modified: llvm/trunk/test/Transforms/LoopUnroll/convergent.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/convergent.ll?rev=326585&r1=326584&r2=326585&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/convergent.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/convergent.ll Fri Mar  2 08:22:32 2018
@@ -80,4 +80,100 @@ exit:
   ret i32 0
 }
 
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip count 4. The loop unroll should respect the pragma.
+; CHECK-LABEL: @pragma_unroll_divisible_trip_count
+define void @pragma_unroll_divisible_trip_count() {
+entry:
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 4
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret void
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
+; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple
+define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 2
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 is unknown to divide runtime trip count, the loop is not unrolled
+; since remainder is forbidden for unrolling convergent loop.
+; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
+; in the future.
+; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count
+define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
+entry:
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 does not divide trip count 5, the loop is not unrolled by 2
+; since remainder is forbidden for unrolling convergent loop. Instead, the
+; loop gets fully unrolled.
+; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
+; in the future.
+; CHECK-LABEL: @pragma_unroll_indivisible_trip_count
+define i32 @pragma_unroll_indivisible_trip_count() {
+entry:
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 5
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret i32 0
+}
+
 !0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
+!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}
+

Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=326585&r1=326584&r2=326585&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Fri Mar  2 08:22:32 2018
@@ -1,5 +1,6 @@
-; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
-; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
+; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
 ;
 ; Run loop unrolling twice to verify that loop unrolling metadata is properly
 ; removed and further unrolling is disabled after the pass is run once.
@@ -168,20 +169,24 @@ for.end:
 
 ; #pragma clang loop unroll_count(4)
 ; Loop has a runtime trip count.  Runtime unrolling should occur and loop
-; should be duplicated (original and 4x unrolled).
+; should be duplicated (original and 4x unrolled) if remainder is allowed,
+; otherwise loop should not be unrolled.
 ;
 ; CHECK-LABEL: @runtime_loop_with_count4(
 ; CHECK: for.body
 ; CHECK: store
-; CHECK: store
-; CHECK: store
-; CHECK: store
+; REM: store
+; REM: store
+; REM: store
 ; CHECK-NOT: store
 ; CHECK: br i1
-; CHECK: for.body.epil:
-; CHECK: store
+; REM: for.body.epil:
+; REM: store
+; NOREM-NOT: for.body.epil:
+; NOREM-NOT: store
 ; CHECK-NOT: store
-; CHECK: br i1
+; REM: br i1
+; NOREM-NOT: br i1
 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
 entry:
   %cmp3 = icmp sgt i32 %b, 0
@@ -284,24 +289,27 @@ for.end:
 
 ; #pragma clang loop unroll(enable)
 ; Loop has a runtime trip count and should be runtime unrolled and duplicated
-; (original and 8x).
+; (original and 8x) if remainder is allowed, otherwise it should not be
+; unrolled.
 ;
 ; CHECK-LABEL: @runtime_loop_with_enable(
 ; CHECK: for.body:
 ; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
 ; CHECK-NOT: store i32
 ; CHECK: br i1
-; CHECK: for.body.epil:
-; CHECK: store
+; REM: for.body.epil:
+; NOREM-NOT: for.body.epil:
+; REM: store
 ; CHECK-NOT: store
-; CHECK: br i1
+; REM: br i1
+; NOREM-NOT: br i1
 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
 entry:
   %cmp3 = icmp sgt i32 %b, 0
@@ -325,19 +333,22 @@ for.end:
 
 ; #pragma clang loop unroll_count(3)
 ; Loop has a runtime trip count.  Runtime unrolling should occur and loop
-; should be duplicated (original and 3x unrolled).
+; should be duplicated (original and 3x unrolled) if remainder is allowed,
+; otherwise it should not be unrolled.
 ;
 ; CHECK-LABEL: @runtime_loop_with_count3(
 ; CHECK: for.body
 ; CHECK: store
-; CHECK: store
-; CHECK: store
+; REM: store
+; REM: store
 ; CHECK-NOT: store
 ; CHECK: br i1
-; CHECK: for.body.epil:
-; CHECK: store
+; REM: for.body.epil:
+; REM: store
+; NOREM-NOT: for.body.epil:
+; NOREM-NOT: store
 ; CHECK-NOT: store
-; CHECK: br i1
+; REM: br i1
 define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
 entry:
   %cmp3 = icmp sgt i32 %b, 0




More information about the llvm-commits mailing list