[llvm] [OpenMPOpt] Fix incorrect end-of-kernel barrier removal (PR #65670)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 11 12:01:49 PDT 2023


llvmbot wrote:

@llvm/pr-subscribers-llvm-transforms

<details>
<summary>Changes</summary>

Barrier removal in OpenMPOpt normally removes barriers by proving that they are redundant with barriers preceding them. However, it can't do this with the "pseudo-barrier" at the end of kernels because that can't be removed. Instead, it removes the barriers preceding the end of the kernel which that end-of-kernel barrier is redundant with. However, these barriers aren't always redundant with the end-of-kernel barrier when loops are involved, and removing them can lead to incorrect results in compiled code.

This change fixes this by requiring that these pre-end-of-kernel barriers also have the kernel end as a unique successor before removing them.
--
Full diff: https://github.com/llvm/llvm-project/pull/65670.diff

2 Files Affected:

- (modified) llvm/lib/Transforms/IPO/OpenMPOpt.cpp (+21-5) 
- (modified) llvm/test/Transforms/OpenMP/barrier_removal.ll (+177-1) 


<pre>
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 63493eb78c451a6..8a8d27d9be38b8d 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2621,6 +2621,17 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker {
   }
 };
 
+/// Determines if \p BB exits the function unconditionally itself or reaches a
+/// block that does through only unique successors.
+static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) {
+  if (succ_empty(BB))
+    return true;
+  const BasicBlock *const Successor = BB->getUniqueSuccessor();
+  if (!Successor)
+    return false;
+  return hasFunctionEndAsUniqueSuccessor(Successor);
+}
+
 struct AAExecutionDomainFunction : public AAExecutionDomain {
   AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
       : AAExecutionDomain(IRP, A) {}
@@ -2676,17 +2687,19 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       if (!ED.EncounteredAssumes.empty() && !A.isModulePass())
         return;
 
-      // We can remove this barrier, if it is one, or all aligned barriers
-      // reaching the kernel end. In the latter case we can transitively work
-      // our way back until we find a barrier that guards a side-effect if we
-      // are dealing with the kernel end here.
+      // We can remove this barrier, if it is one, or aligned barriers reaching
+      // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel
+      // end should only be removed if the kernel end is their unique successor;
+      // otherwise, they may have side-effects that aren't accounted for in the
+      // kernel end in their other successors. If those barriers have other
+      // barriers reaching them, those can be transitively removed as well as
+      // long as the kernel end is also their unique successor.
       if (CB) {
         DeletedBarriers.insert(CB);
         A.deleteAfterManifest(*CB);
         ++NumBarriersEliminated;
         Changed = ChangeStatus::CHANGED;
       } else if (!ED.AlignedBarriers.empty()) {
-        NumBarriersEliminated += ED.AlignedBarriers.size();
         Changed = ChangeStatus::CHANGED;
         SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
                                          ED.AlignedBarriers.end());
@@ -2697,7 +2710,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
             continue;
           if (LastCB->getFunction() != getAnchorScope())
             continue;
+          if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent()))
+            continue;
           if (!DeletedBarriers.count(LastCB)) {
+            ++NumBarriersEliminated;
             A.deleteAfterManifest(*LastCB);
             continue;
           }
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index e45f746cbf4395d..8aca820afdfa883 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -1065,8 +1065,174 @@ define void @caller_barrier2() "kernel" {
   ret void
 }
 
+define void @loop_barrier() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @aligned_barrier()
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  call void @unknown()
+  call void @aligned_barrier()
+  %i.next = add nuw nsw i32 %i, 1
+  %cond = icmp ne i32 %i.next, 128
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @loop_barrier_end_barriers() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @aligned_barrier()
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  call void @unknown()
+  call void @aligned_barrier()
+  %i.next = add nuw nsw i32 %i, 1
+  %cond = icmp ne i32 %i.next, 128
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  call void @aligned_barrier()
+  call void @aligned_barrier()
+  call void @aligned_barrier()
+  call void @aligned_barrier()
+  ret void
+}
+
+define void @loop_barrier_end_barriers_unknown() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @aligned_barrier()
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  call void @unknown()
+  call void @aligned_barrier()
+  %i.next = add nuw nsw i32 %i, 1
+  %cond = icmp ne i32 %i.next, 128
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  call void @aligned_barrier()
+  call void @aligned_barrier()
+  call void @unknown()
+  call void @aligned_barrier()
+  call void @aligned_barrier()
+  ret void
+}
+
+define void @loop_barrier_store() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    store i32 [[I]], ptr @G1, align 4
+; CHECK-NEXT:    call void @aligned_barrier()
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  store i32 %i, ptr @G1
+  call void @aligned_barrier()
+  %i.next = add nuw nsw i32 %i, 1
+  %cond = icmp ne i32 %i.next, 128
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @loop_barrier_end_barriers_store() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    store i32 [[I]], ptr @G1, align 4
+; CHECK-NEXT:    call void @aligned_barrier()
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i32 [[I_NEXT]], ptr @G1, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  store i32 %i, ptr @G1
+  call void @aligned_barrier()
+  %i.next = add nuw nsw i32 %i, 1
+  %cond = icmp ne i32 %i.next, 128
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  call void @aligned_barrier()
+  call void @aligned_barrier()
+  store i32 %i.next, ptr @G1
+  call void @aligned_barrier()
+  call void @aligned_barrier()
+  ret void
+}
+
 !llvm.module.flags = !{!16,!15}
-!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25}
+!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30}
 
 !0 = !{ptr @pos_empty_1, !"kernel", i32 1}
 !1 = !{ptr @pos_empty_2, !"kernel", i32 1}
@@ -1079,6 +1245,11 @@ define void @caller_barrier2() "kernel" {
 !23 = !{ptr @pos_empty_8, !"kernel", i32 1}
 !24 = !{ptr @caller_barrier1, !"kernel", i32 1}
 !25 = !{ptr @caller_barrier2, !"kernel", i32 1}
+!26 = !{ptr @loop_barrier, !"kernel", i32 1}
+!27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
+!28 = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
+!29 = !{ptr @loop_barrier_store, !"kernel", i32 1}
+!30 = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
 !6 = !{ptr @neg_empty_8, !"kernel", i32 1}
 !19 = !{ptr @neg_empty_9, !"kernel", i32 1}
 !20 = !{ptr @pos_empty_10, !"kernel", i32 1}
@@ -1128,4 +1299,9 @@ define void @caller_barrier2() "kernel" {
 ; CHECK: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
 ; CHECK: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
 ; CHECK: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
+; CHECK: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
+; CHECK: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
+; CHECK: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
+; CHECK: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
+; CHECK: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
 ;.
</pre>

</details>

https://github.com/llvm/llvm-project/pull/65670


More information about the llvm-commits mailing list