[llvm] 24656e9 - [OpenMPOpt] The kernel end is not necessarily an aligned barrier
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 7 16:39:15 PDT 2023
Author: Johannes Doerfert
Date: 2023-07-07T16:38:34-07:00
New Revision: 24656e995ae9c5893fa32db62863e55c3c70c5e2
URL: https://github.com/llvm/llvm-project/commit/24656e995ae9c5893fa32db62863e55c3c70c5e2
DIFF: https://github.com/llvm/llvm-project/commit/24656e995ae9c5893fa32db62863e55c3c70c5e2.diff
LOG: [OpenMPOpt] The kernel end is not necessarily an aligned barrier
A kernel can be exited in a non-aligned fashion, so we cannot pretend it
always ends in an aligned barrier. Instead, we require an explicit
aligned barrier as we lack a divergence analysis at this point.
Added:
Modified:
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 880e5560e03c69..ba0a72bda0c8c4 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2579,7 +2579,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
SmallPtrSet<CallBase *, 16> DeletedBarriers;
auto HandleAlignedBarrier = [&](CallBase *CB) {
- const ExecutionDomainTy &ED = CEDMap[{CB, PRE}];
+ const ExecutionDomainTy &ED = CB ? CEDMap[{CB, PRE}] : BEDMap[nullptr];
if (!ED.IsReachedFromAlignedBarrierOnly ||
ED.EncounteredNonLocalSideEffect)
return;
@@ -2913,11 +2913,10 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// Helper to deal with an aligned barrier encountered during the forward
// traversal. \p CB is the aligned barrier, \p ED is the execution domain when
// it was encountered.
- auto HandleAlignedBarrier = [&](CallBase *CB, ExecutionDomainTy &ED) {
- if (CB)
- Changed |= AlignedBarriers.insert(CB);
+ auto HandleAlignedBarrier = [&](CallBase &CB, ExecutionDomainTy &ED) {
+ Changed |= AlignedBarriers.insert(&CB);
// First, update the barrier ED kept in the separate CEDMap.
- auto &CallInED = CEDMap[{CB, PRE}];
+ auto &CallInED = CEDMap[{&CB, PRE}];
Changed |= mergeInPredecessor(A, CallInED, ED);
CallInED.IsReachingAlignedBarrierOnly = true;
// Next adjust the ED we use for the traversal.
@@ -2925,9 +2924,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
ED.IsReachedFromAlignedBarrierOnly = true;
// Aligned barrier collection has to come last.
ED.clearAssumeInstAndAlignedBarriers();
- if (CB)
- ED.addAlignedBarrier(A, *CB);
- auto &CallOutED = CEDMap[{CB, POST}];
+ ED.addAlignedBarrier(A, CB);
+ auto &CallOutED = CEDMap[{&CB, POST}];
Changed |= mergeInPredecessor(A, CallOutED, ED);
};
@@ -2946,6 +2944,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// TODO: We use local reasoning since we don't have a divergence analysis
// running as well. We could basically allow uniform branches here.
bool AlignedBarrierLastInBlock = IsEntryBB && IsKernel;
+ bool IsExplicitlyAligned = IsEntryBB && IsKernel;
ExecutionDomainTy ED;
// Propagate "incoming edges" into information about this block.
if (IsEntryBB) {
@@ -3020,14 +3019,16 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
AANoSync::isAlignedBarrier(*CB, AlignedBarrierLastInBlock);
AlignedBarrierLastInBlock &= IsNoSync;
+ IsExplicitlyAligned &= IsNoSync;
// Next we check for calls. Aligned barriers are handled
// explicitly, everything else is kept for the backward traversal and will
// also affect our state.
if (CB) {
if (IsAlignedBarrier) {
- HandleAlignedBarrier(CB, ED);
+ HandleAlignedBarrier(*CB, ED);
AlignedBarrierLastInBlock = true;
+ IsExplicitlyAligned = true;
continue;
}
@@ -3129,14 +3130,16 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
Changed |= mergeInPredecessor(A, InterProceduralED, ED);
auto &FnED = BEDMap[nullptr];
+ if (IsKernel && !IsExplicitlyAligned)
+ FnED.IsReachingAlignedBarrierOnly = false;
+ Changed |= mergeInPredecessor(A, FnED, ED);
+
if (!FnED.IsReachingAlignedBarrierOnly) {
IsEndAndNotReachingAlignedBarriersOnly = true;
SyncInstWorklist.push_back(BB.getTerminator());
auto &BBED = BEDMap[&BB];
Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false);
}
- if (IsKernel)
- HandleAlignedBarrier(nullptr, ED);
}
ExecutionDomainTy &StoredED = BEDMap[&BB];
diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index f431906f7027a4..b91cf8e6801eb1 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -252,26 +252,32 @@ S:
ret void
}
-; FIXME: We should not replace the load or delete the second store.
+; We should not replace the load or delete the second store.
define void @kernel4b1(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel4b1
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
; TUNIT: L:
; TUNIT-NEXT: call void @sync()
-; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]]
+; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
+; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]]
; TUNIT-NEXT: ret void
; TUNIT: S:
+; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4
; TUNIT-NEXT: ret void
;
; CGSCC-LABEL: define {{[^@]+}}@kernel4b1
; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4
; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
; CGSCC: L:
; CGSCC-NEXT: call void @sync()
-; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]]
+; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
+; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]]
; CGSCC-NEXT: ret void
; CGSCC: S:
+; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4
; CGSCC-NEXT: ret void
;
store i32 0, ptr addrspace(3) @QB1
@@ -328,7 +334,7 @@ define void @kernel4b2(i1 %c) "kernel" {
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
; TUNIT: L:
; TUNIT-NEXT: call void @sync()
-; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]]
+; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
; TUNIT-NEXT: ret void
; TUNIT: S:
; TUNIT-NEXT: ret void
@@ -338,7 +344,7 @@ define void @kernel4b2(i1 %c) "kernel" {
; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
; CGSCC: L:
; CGSCC-NEXT: call void @sync()
-; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]]
+; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
; CGSCC-NEXT: ret void
; CGSCC: S:
; CGSCC-NEXT: ret void
More information about the llvm-commits
mailing list