[llvm] [OpenMPOpt] Fix incorrect end-of-kernel barrier removal (PR #65670)

Daniel Woodworth via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 7 13:36:58 PDT 2023


https://github.com/dwoodwor-intel created https://github.com/llvm/llvm-project/pull/65670:

Barrier removal in OpenMPOpt normally removes barriers by proving that they are redundant with barriers preceding them. However, it can't do this with the "pseudo-barrier" at the end of kernels because that can't be removed. Instead, it removes the barriers preceding the end of the kernel which that end-of-kernel barrier is redundant with. However, these barriers aren't always redundant with the end-of-kernel barrier when loops are involved, and removing them can lead to incorrect results in compiled code.

This change fixes this by requiring that these pre-end-of-kernel barriers are also only reaching aligned barriers before removing them.

>From b74d06bde6e7b8466a1497bd5e6f4e52eca019c4 Mon Sep 17 00:00:00 2001
From: Daniel Woodworth <daniel.woodworth at intel.com>
Date: Thu, 7 Sep 2023 15:54:02 -0400
Subject: [PATCH] [OpenMPOpt] Fix incorrect end-of-kernel barrier removal

Barrier removal in OpenMPOpt normally removes barriers by proving that
they are redundant with barriers preceding them. However, it can't do
this with the "pseudo-barrier" at the end of kernels because that can't
be removed. Instead, it removes the barriers preceding the end of the
kernel which that end-of-kernel barrier is redundant with. However,
these barriers aren't always redundant with the end-of-kernel barrier
when loops are involved, and removing them can lead to incorrect results
in compiled code.

This change fixes this by requiring that these pre-end-of-kernel
barriers are also only reaching aligned barriers before removing them.
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         | 16 +++++---
 .../test/Transforms/OpenMP/barrier_removal.ll | 37 ++++++++++++++++++-
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 63493eb78c451a6..05697341eab64fa 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2676,17 +2676,19 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       if (!ED.EncounteredAssumes.empty() && !A.isModulePass())
         return;
 
-      // We can remove this barrier, if it is one, or all aligned barriers
-      // reaching the kernel end. In the latter case we can transitively work
-      // our way back until we find a barrier that guards a side-effect if we
-      // are dealing with the kernel end here.
+      // We can remove this barrier, if it is one, or aligned barriers reaching
+      // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel
+      // end may have other successors besides the kernel end (especially if
+      // they're in loops) with non-local side-effects, so those barriers can
+      // only be removed if they also only reach the kernel end. If those
+      // barriers have other barriers reaching them, those can be transitively
+      // removed as well.
       if (CB) {
         DeletedBarriers.insert(CB);
         A.deleteAfterManifest(*CB);
         ++NumBarriersEliminated;
         Changed = ChangeStatus::CHANGED;
       } else if (!ED.AlignedBarriers.empty()) {
-        NumBarriersEliminated += ED.AlignedBarriers.size();
         Changed = ChangeStatus::CHANGED;
         SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
                                          ED.AlignedBarriers.end());
@@ -2697,7 +2699,11 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
             continue;
           if (LastCB->getFunction() != getAnchorScope())
             continue;
+          const ExecutionDomainTy &PostLastED = CEDMap[{LastCB, POST}];
+          if (!PostLastED.IsReachingAlignedBarrierOnly)
+            continue;
           if (!DeletedBarriers.count(LastCB)) {
+            ++NumBarriersEliminated;
             A.deleteAfterManifest(*LastCB);
             continue;
           }
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index e45f746cbf4395d..4a7491e9404c0fa 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -736,13 +736,16 @@ define void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p)
 ; MODULE-NEXT:    call void @barrier_then_write0(ptr [[P]])
 ; MODULE-NEXT:    br label [[T0B3:%.*]]
 ; MODULE:       t0b3:
+; MODULE-NEXT:    call void @aligned_barrier()
 ; MODULE-NEXT:    br label [[M3:%.*]]
 ; MODULE:       f03:
 ; MODULE-NEXT:    call void @barrier_then_write0(ptr [[P]])
 ; MODULE-NEXT:    br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
 ; MODULE:       t13:
+; MODULE-NEXT:    call void @aligned_barrier()
 ; MODULE-NEXT:    br label [[M3]]
 ; MODULE:       f13:
+; MODULE-NEXT:    call void @aligned_barrier()
 ; MODULE-NEXT:    br label [[M3]]
 ; MODULE:       m3:
 ; MODULE-NEXT:    call void @write_then_barrier0(ptr [[P]])
@@ -1065,8 +1068,38 @@ define void @caller_barrier2() "kernel" {
   ret void
 }
 
+define void @loop_barrier() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @aligned_barrier()
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  call void @unknown()
+  call void @aligned_barrier()
+  %i.next = add nuw nsw i32 %i, 1
+  %cond = icmp ne i32 %i.next, 128
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
 !llvm.module.flags = !{!16,!15}
-!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25}
+!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26}
 
 !0 = !{ptr @pos_empty_1, !"kernel", i32 1}
 !1 = !{ptr @pos_empty_2, !"kernel", i32 1}
@@ -1079,6 +1112,7 @@ define void @caller_barrier2() "kernel" {
 !23 = !{ptr @pos_empty_8, !"kernel", i32 1}
 !24 = !{ptr @caller_barrier1, !"kernel", i32 1}
 !25 = !{ptr @caller_barrier2, !"kernel", i32 1}
+!26 = !{ptr @loop_barrier, !"kernel", i32 1}
 !6 = !{ptr @neg_empty_8, !"kernel", i32 1}
 !19 = !{ptr @neg_empty_9, !"kernel", i32 1}
 !20 = !{ptr @pos_empty_10, !"kernel", i32 1}
@@ -1128,4 +1162,5 @@ define void @caller_barrier2() "kernel" {
 ; CHECK: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
 ; CHECK: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
 ; CHECK: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
+; CHECK: [[META25:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
 ;.



More information about the llvm-commits mailing list