[llvm-branch-commits] [clang] 2f0a69c - [OpenMP] Fix partial unrolling off-by-one.

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jun 2 22:32:05 PDT 2022


Author: Michael Kruse
Date: 2022-06-02T22:31:52-07:00
New Revision: 2f0a69c32a4c48473c187179a594ac035b244e74

URL: https://github.com/llvm/llvm-project/commit/2f0a69c32a4c48473c187179a594ac035b244e74
DIFF: https://github.com/llvm/llvm-project/commit/2f0a69c32a4c48473c187179a594ac035b244e74.diff

LOG: [OpenMP] Fix partial unrolling off-by-one.

Even though the comment description is ".unroll_inner.iv < NumIterations", the code emitted a BO_LE ('<=') operator for the inner loop that is to be unrolled. This lead to one additional copy of the body code in a partially unrolled. It only manifests when the unrolled loop is consumed by another loop-associated construct. Fix by using the BO_LT operator instead.

The condition for the outer loop and the corresponding code for tiling correctly used BO_LT already.

Fixes #55236

Added: 
    

Modified: 
    clang/lib/Sema/SemaOpenMP.cpp
    clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c
    clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c
    clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
    clang/test/OpenMP/unroll_codegen_for_partial.cpp
    clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
    clang/test/OpenMP/unroll_codegen_tile_for.cpp
    clang/test/OpenMP/unroll_codegen_unroll_for.cpp
    clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index a500ad4f02209..32e90ced7b29d 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -13314,11 +13314,11 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
   if (!EndOfTile.isUsable())
     return StmtError();
   ExprResult InnerCond1 = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(),
-                                     BO_LE, MakeInnerRef(), EndOfTile.get());
+                                     BO_LT, MakeInnerRef(), EndOfTile.get());
   if (!InnerCond1.isUsable())
     return StmtError();
   ExprResult InnerCond2 =
-      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LE, MakeInnerRef(),
+      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeInnerRef(),
                  MakeNumIterations());
   if (!InnerCond2.isUsable())
     return StmtError();

diff  --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c
index a86a5efc9fc58..753fbc5d3341b 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c
@@ -106,12 +106,12 @@
 // CHECK-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
 // CHECK-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
 // CHECK-NEXT:    %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4
-// CHECK-NEXT:    %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]]
+// CHECK-NEXT:    %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]]
 // CHECK-NEXT:    br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[LAND_RHS]]:
 // CHECK-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
-// CHECK-NEXT:    %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8
+// CHECK-NEXT:    %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8
 // CHECK-NEXT:    br label %[[LAND_END]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[LAND_END]]:

diff  --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c
index be974d5454bd1..5516835832af4 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c
@@ -121,11 +121,11 @@ void unroll_partial_heuristic_for(int m, float *a, float *b, float *c, float *d,
 // CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4
 // CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTUNROLLED_IV_J7]], align 4
 // CHECK-NEXT:    [[ADD21:%.*]] = add nsw i32 [[TMP16]], 2
-// CHECK-NEXT:    [[CMP22:%.*]] = icmp sle i32 [[TMP15]], [[ADD21]]
+// CHECK-NEXT:    [[CMP22:%.*]] = icmp slt i32 [[TMP15]], [[ADD21]]
 // CHECK-NEXT:    br i1 [[CMP22]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]]
 // CHECK:       land.rhs:
 // CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4
-// CHECK-NEXT:    [[CMP24:%.*]] = icmp sle i32 [[TMP17]], 8
+// CHECK-NEXT:    [[CMP24:%.*]] = icmp slt i32 [[TMP17]], 8
 // CHECK-NEXT:    br label [[LAND_END]]
 // CHECK:       land.end:
 // CHECK-NEXT:    [[TMP18:%.*]] = phi i1 [ false, [[FOR_COND]] ], [ [[CMP24]], [[LAND_RHS]] ]

diff  --git a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
index 447d61a2b494a..7df81c72c097f 100644
--- a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
+++ b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
@@ -176,14 +176,14 @@ extern "C" void body(...) {}
 // IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
 // IR-NEXT:    %[[TMP40:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4
 // IR-NEXT:    %[[ADD50:.+]] = add i32 %[[TMP40]], 2
-// IR-NEXT:    %[[CMP51:.+]] = icmp ule i32 %[[TMP39]], %[[ADD50]]
+// IR-NEXT:    %[[CMP51:.+]] = icmp ult i32 %[[TMP39]], %[[ADD50]]
 // IR-NEXT:    br i1 %[[CMP51]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS]]:
 // IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
 // IR-NEXT:    %[[TMP42:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4
 // IR-NEXT:    %[[ADD52:.+]] = add i32 %[[TMP42]], 1
-// IR-NEXT:    %[[CMP53:.+]] = icmp ule i32 %[[TMP41]], %[[ADD52]]
+// IR-NEXT:    %[[CMP53:.+]] = icmp ult i32 %[[TMP41]], %[[ADD52]]
 // IR-NEXT:    br label %[[LAND_END]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END]]:

diff  --git a/clang/test/OpenMP/unroll_codegen_for_partial.cpp b/clang/test/OpenMP/unroll_codegen_for_partial.cpp
index e97f1e6708897..8d9083faf295b 100644
--- a/clang/test/OpenMP/unroll_codegen_for_partial.cpp
+++ b/clang/test/OpenMP/unroll_codegen_for_partial.cpp
@@ -114,14 +114,14 @@ extern "C" void body(...) {}
 // IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
 // IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP22]], 2
-// IR-NEXT:    %[[CMP18:.+]] = icmp ule i32 %[[TMP21]], %[[ADD17]]
+// IR-NEXT:    %[[CMP18:.+]] = icmp ult i32 %[[TMP21]], %[[ADD17]]
 // IR-NEXT:    br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS]]:
 // IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
 // IR-NEXT:    %[[ADD19:.+]] = add i32 %[[TMP24]], 1
-// IR-NEXT:    %[[CMP20:.+]] = icmp ule i32 %[[TMP23]], %[[ADD19]]
+// IR-NEXT:    %[[CMP20:.+]] = icmp ult i32 %[[TMP23]], %[[ADD19]]
 // IR-NEXT:    br label %[[LAND_END]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END]]:

diff  --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
index 6a7f4b5b80a3e..2e87d940f3a62 100644
--- a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
+++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
@@ -143,14 +143,14 @@ extern "C" void func(int start, int end, int step) {
 // IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
 // IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP27]], 7
-// IR-NEXT:    %[[CMP18:.+]] = icmp ule i32 %[[TMP26]], %[[ADD17]]
+// IR-NEXT:    %[[CMP18:.+]] = icmp ult i32 %[[TMP26]], %[[ADD17]]
 // IR-NEXT:    br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS]]:
 // IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
 // IR-NEXT:    %[[ADD19:.+]] = add i32 %[[TMP29]], 1
-// IR-NEXT:    %[[CMP20:.+]] = icmp ule i32 %[[TMP28]], %[[ADD19]]
+// IR-NEXT:    %[[CMP20:.+]] = icmp ult i32 %[[TMP28]], %[[ADD19]]
 // IR-NEXT:    br label %[[LAND_END]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END]]:

diff  --git a/clang/test/OpenMP/unroll_codegen_tile_for.cpp b/clang/test/OpenMP/unroll_codegen_tile_for.cpp
index 138d7171fc666..cfb7cb7360f98 100644
--- a/clang/test/OpenMP/unroll_codegen_tile_for.cpp
+++ b/clang/test/OpenMP/unroll_codegen_tile_for.cpp
@@ -162,14 +162,14 @@ extern "C" void body(...) {}
 // IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
 // IR-NEXT:    %[[ADD36:.+]] = add i32 %[[TMP32]], 2
-// IR-NEXT:    %[[CMP37:.+]] = icmp ule i32 %[[TMP31]], %[[ADD36]]
+// IR-NEXT:    %[[CMP37:.+]] = icmp ult i32 %[[TMP31]], %[[ADD36]]
 // IR-NEXT:    br i1 %[[CMP37]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS]]:
 // IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
 // IR-NEXT:    %[[ADD38:.+]] = add i32 %[[TMP34]], 1
-// IR-NEXT:    %[[CMP39:.+]] = icmp ule i32 %[[TMP33]], %[[ADD38]]
+// IR-NEXT:    %[[CMP39:.+]] = icmp ult i32 %[[TMP33]], %[[ADD38]]
 // IR-NEXT:    br label %[[LAND_END]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END]]:

diff  --git a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp
index 27597adbc0917..ea9707e1757dc 100644
--- a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp
+++ b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp
@@ -129,14 +129,14 @@ extern "C" void body(...) {}
 // IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
 // IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
 // IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP25]], 2
-// IR-NEXT:    %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]]
+// IR-NEXT:    %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]]
 // IR-NEXT:    br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS]]:
 // IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
 // IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
 // IR-NEXT:    %[[ADD25:.+]] = add i32 %[[TMP27]], 1
-// IR-NEXT:    %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]]
+// IR-NEXT:    %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]]
 // IR-NEXT:    br label %[[LAND_END]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END]]:
@@ -156,14 +156,14 @@ extern "C" void body(...) {}
 // IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
 // IR-NEXT:    %[[ADD30:.+]] = add i32 %[[TMP32]], 2
-// IR-NEXT:    %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]]
+// IR-NEXT:    %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]]
 // IR-NEXT:    br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS32]]:
 // IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
 // IR-NEXT:    %[[ADD33:.+]] = add i32 %[[TMP34]], 1
-// IR-NEXT:    %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]]
+// IR-NEXT:    %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]]
 // IR-NEXT:    br label %[[LAND_END35]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END35]]:

diff  --git a/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp
index 027bca496eb25..046f61212888b 100644
--- a/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp
+++ b/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp
@@ -129,14 +129,14 @@ extern "C" void body(...) {}
 // IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
 // IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
 // IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP25]], 2
-// IR-NEXT:    %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]]
+// IR-NEXT:    %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]]
 // IR-NEXT:    br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS]]:
 // IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
 // IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
 // IR-NEXT:    %[[ADD25:.+]] = add i32 %[[TMP27]], 1
-// IR-NEXT:    %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]]
+// IR-NEXT:    %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]]
 // IR-NEXT:    br label %[[LAND_END]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END]]:
@@ -156,14 +156,14 @@ extern "C" void body(...) {}
 // IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
 // IR-NEXT:    %[[ADD30:.+]] = add i32 %[[TMP32]], 2
-// IR-NEXT:    %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]]
+// IR-NEXT:    %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]]
 // IR-NEXT:    br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_RHS32]]:
 // IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
 // IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
 // IR-NEXT:    %[[ADD33:.+]] = add i32 %[[TMP34]], 1
-// IR-NEXT:    %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]]
+// IR-NEXT:    %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]]
 // IR-NEXT:    br label %[[LAND_END35]]
 // IR-EMPTY:
 // IR-NEXT:  [[LAND_END35]]:


        


More information about the llvm-branch-commits mailing list