[llvm] [OpenMPOpt] Fix incorrect end-of-kernel barrier removal (PR #65670)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 11 12:01:49 PDT 2023
llvmbot wrote:
@llvm/pr-subscribers-llvm-transforms
<details>
<summary>Changes</summary>
Barrier removal in OpenMPOpt normally removes barriers by proving that they are redundant with barriers preceding them. However, it can't do this with the "pseudo-barrier" at the end of kernels because that can't be removed. Instead, it removes the barriers preceding the end of the kernel which that end-of-kernel barrier is redundant with. However, these barriers aren't always redundant with the end-of-kernel barrier when loops are involved, and removing them can lead to incorrect results in compiled code.
This change fixes this by requiring that these pre-end-of-kernel barriers also have the kernel end as a unique successor before removing them.
--
Full diff: https://github.com/llvm/llvm-project/pull/65670.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/IPO/OpenMPOpt.cpp (+21-5)
- (modified) llvm/test/Transforms/OpenMP/barrier_removal.ll (+177-1)
<pre>
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 63493eb78c451a6..8a8d27d9be38b8d 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2621,6 +2621,17 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker {
}
};
+/// Determines if \p BB exits the function unconditionally itself or reaches a
+/// block that does through only unique successors.
+static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) {
+ if (succ_empty(BB))
+ return true;
+ const BasicBlock *const Successor = BB->getUniqueSuccessor();
+ if (!Successor)
+ return false;
+ return hasFunctionEndAsUniqueSuccessor(Successor);
+}
+
struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
: AAExecutionDomain(IRP, A) {}
@@ -2676,17 +2687,19 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
if (!ED.EncounteredAssumes.empty() && !A.isModulePass())
return;
- // We can remove this barrier, if it is one, or all aligned barriers
- // reaching the kernel end. In the latter case we can transitively work
- // our way back until we find a barrier that guards a side-effect if we
- // are dealing with the kernel end here.
+ // We can remove this barrier, if it is one, or aligned barriers reaching
+ // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel
+ // end should only be removed if the kernel end is their unique successor;
+ // otherwise, they may have side-effects that aren't accounted for in the
+ // kernel end in their other successors. If those barriers have other
+ // barriers reaching them, those can be transitively removed as well as
+ // long as the kernel end is also their unique successor.
if (CB) {
DeletedBarriers.insert(CB);
A.deleteAfterManifest(*CB);
++NumBarriersEliminated;
Changed = ChangeStatus::CHANGED;
} else if (!ED.AlignedBarriers.empty()) {
- NumBarriersEliminated += ED.AlignedBarriers.size();
Changed = ChangeStatus::CHANGED;
SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
ED.AlignedBarriers.end());
@@ -2697,7 +2710,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
continue;
if (LastCB->getFunction() != getAnchorScope())
continue;
+ if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent()))
+ continue;
if (!DeletedBarriers.count(LastCB)) {
+ ++NumBarriersEliminated;
A.deleteAfterManifest(*LastCB);
continue;
}
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index e45f746cbf4395d..8aca820afdfa883 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -1065,8 +1065,174 @@ define void @caller_barrier2() "kernel" {
ret void
}
+define void @loop_barrier() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ call void @unknown()
+ call void @aligned_barrier()
+ %i.next = add nuw nsw i32 %i, 1
+ %cond = icmp ne i32 %i.next, 128
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @loop_barrier_end_barriers() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ call void @unknown()
+ call void @aligned_barrier()
+ %i.next = add nuw nsw i32 %i, 1
+ %cond = icmp ne i32 %i.next, 128
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ call void @aligned_barrier()
+ call void @aligned_barrier()
+ call void @aligned_barrier()
+ call void @aligned_barrier()
+ ret void
+}
+
+define void @loop_barrier_end_barriers_unknown() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ call void @unknown()
+ call void @aligned_barrier()
+ %i.next = add nuw nsw i32 %i, 1
+ %cond = icmp ne i32 %i.next, 128
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ call void @aligned_barrier()
+ call void @aligned_barrier()
+ call void @unknown()
+ call void @aligned_barrier()
+ call void @aligned_barrier()
+ ret void
+}
+
+define void @loop_barrier_store() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ store i32 %i, ptr @G1
+ call void @aligned_barrier()
+ %i.next = add nuw nsw i32 %i, 1
+ %cond = icmp ne i32 %i.next, 128
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @loop_barrier_end_barriers_store() "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: store i32 [[I_NEXT]], ptr @G1, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ store i32 %i, ptr @G1
+ call void @aligned_barrier()
+ %i.next = add nuw nsw i32 %i, 1
+ %cond = icmp ne i32 %i.next, 128
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ call void @aligned_barrier()
+ call void @aligned_barrier()
+ store i32 %i.next, ptr @G1
+ call void @aligned_barrier()
+ call void @aligned_barrier()
+ ret void
+}
+
!llvm.module.flags = !{!16,!15}
-!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25}
+!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30}
!0 = !{ptr @pos_empty_1, !"kernel", i32 1}
!1 = !{ptr @pos_empty_2, !"kernel", i32 1}
@@ -1079,6 +1245,11 @@ define void @caller_barrier2() "kernel" {
!23 = !{ptr @pos_empty_8, !"kernel", i32 1}
!24 = !{ptr @caller_barrier1, !"kernel", i32 1}
!25 = !{ptr @caller_barrier2, !"kernel", i32 1}
+!26 = !{ptr @loop_barrier, !"kernel", i32 1}
+!27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
+!28 = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
+!29 = !{ptr @loop_barrier_store, !"kernel", i32 1}
+!30 = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
!6 = !{ptr @neg_empty_8, !"kernel", i32 1}
!19 = !{ptr @neg_empty_9, !"kernel", i32 1}
!20 = !{ptr @pos_empty_10, !"kernel", i32 1}
@@ -1128,4 +1299,9 @@ define void @caller_barrier2() "kernel" {
; CHECK: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
; CHECK: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
; CHECK: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
+; CHECK: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
+; CHECK: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
+; CHECK: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
+; CHECK: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
+; CHECK: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
;.
</pre>
</details>
https://github.com/llvm/llvm-project/pull/65670
More information about the llvm-commits
mailing list