[llvm] c6457dc - [OpenMP][FIX] Be more deliberate about invalidating the AAKernelInfo state
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 22 22:05:28 PDT 2021
Author: Johannes Doerfert
Date: 2021-09-23T00:04:30-05:00
New Revision: c6457dcae88043cbd07dedc61b602b66b26f1ddf
URL: https://github.com/llvm/llvm-project/commit/c6457dcae88043cbd07dedc61b602b66b26f1ddf
DIFF: https://github.com/llvm/llvm-project/commit/c6457dcae88043cbd07dedc61b602b66b26f1ddf.diff
LOG: [OpenMP][FIX] Be more deliberate about invalidating the AAKernelInfo state
This patch fixes a problem when the AAKernelInfo state was invalidated,
e.g., due to `optnone` for a kernel, but not all parts indicated the
invalidation properly. We further eliminate most full state
invalidations as they should never be necessary.
Differential Revision: https://reviews.llvm.org/D109468
Added:
Modified:
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 8ae0780c590d..0ec36d94f3ae 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -579,7 +579,9 @@ struct KernelInfoState : AbstractState {
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
IsAtFixpoint = true;
+ ReachingKernelEntries.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ ReachedKnownParallelRegions.indicatePessimisticFixpoint();
ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
return ChangeStatus::CHANGED;
}
@@ -626,13 +628,13 @@ struct KernelInfoState : AbstractState {
KernelInfoState operator^=(const KernelInfoState &KIS) {
// Do not merge two
diff erent _init and _deinit call sites.
if (KIS.KernelInitCB) {
- if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
- indicatePessimisticFixpoint();
+ if(KernelInitCB && KernelInitCB != KIS.KernelInitCB)
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt assumptions.");
KernelInitCB = KIS.KernelInitCB;
}
if (KIS.KernelDeinitCB) {
- if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
- indicatePessimisticFixpoint();
+ if(KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
@@ -2828,9 +2830,13 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
: "") +
std::string(" #PRs: ") +
- std::to_string(ReachedKnownParallelRegions.size()) +
+ (ReachedKnownParallelRegions.isValidState()
+ ? std::to_string(ReachedKnownParallelRegions.size())
+ : "<invalid>") +
", #Unknown PRs: " +
- std::to_string(ReachedUnknownParallelRegions.size()) +
+ (ReachedUnknownParallelRegions.isValidState()
+ ? std::to_string(ReachedUnknownParallelRegions.size())
+ : "<invalid>") +
", #Reaching Kernels: " +
(ReachingKernelEntries.isValidState()
? std::to_string(ReachingKernelEntries.size())
@@ -3036,7 +3042,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
if (!mayContainParallelRegion() || !changeToSPMDMode(A))
- buildCustomStateMachine(A);
+ return buildCustomStateMachine(A);
return ChangeStatus::CHANGED;
}
@@ -3340,7 +3346,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
ChangeStatus buildCustomStateMachine(Attributor &A) {
// If we have disabled state machine rewrites, don't make a custom one
if (DisableOpenMPOptStateMachineRewrite)
- return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
assert(ReachedKnownParallelRegions.isValidState() &&
"Custom state machine with invalid parallel region states?");
@@ -3683,8 +3689,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
bool UsedAssumedInformationInCheckCallInst = false;
if (!A.checkForAllCallLikeInstructions(
- CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
+ CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
+ LLVM_DEBUG(dbgs() << TAG << "Failed to visit all call-like instructions!\n";);
return indicatePessimisticFixpoint();
+ }
// If we haven't used any assumed information for the SPMD state we can fix
// it.
@@ -3892,18 +3900,16 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// We do not look into tasks right now, just give up.
SPMDCompatibilityTracker.insert(&CB);
ReachedUnknownParallelRegions.insert(&CB);
- indicatePessimisticFixpoint();
- return;
+ break;
case OMPRTL___kmpc_alloc_shared:
case OMPRTL___kmpc_free_shared:
// Return without setting a fixpoint, to be resolved in updateImpl.
return;
default:
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
- // generally.
+ // generally. However, they do not hide parallel regions.
SPMDCompatibilityTracker.insert(&CB);
- indicatePessimisticFixpoint();
- return;
+ break;
}
// All other OpenMP runtime calls will not reach parallel regions so they
// can be safely ignored for now. Since it is a known OpenMP runtime call we
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
index a6cbd20c8eba..9d2014ce0e8d 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
@@ -68,9 +68,10 @@ define weak void @kernel2() #0 {
}
define internal void @helper0() {
-; CHECK-LABEL: define {{[^@]+}}@helper0() {
-; CHECK-NEXT: [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1:[0-9]+]]
-; CHECK-NEXT: store i32 undef, i32* @G, align 4
+; CHECK-LABEL: define {{[^@]+}}@helper0
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
+; CHECK-NEXT: store i32 [[THREADLIMIT]], i32* @G, align 4
; CHECK-NEXT: ret void
;
%threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
@@ -81,11 +82,13 @@ define internal void @helper0() {
define internal void @helper1() {
; CHECK-LABEL: define {{[^@]+}}@helper1() {
; CHECK-NEXT: [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
-; CHECK-NEXT: unreachable
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[THREADLIMIT]], 666
+; CHECK-NEXT: br i1 [[C]], label [[F:%.*]], label [[T:%.*]]
; CHECK: t:
-; CHECK-NEXT: unreachable
+; CHECK-NEXT: call void @helper0() #[[ATTR1]]
+; CHECK-NEXT: ret void
; CHECK: f:
-; CHECK-NEXT: unreachable
+; CHECK-NEXT: ret void
;
%threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
%c = icmp eq i32 %threadLimit, 666
@@ -100,7 +103,7 @@ f:
define internal void @helper2() {
; CHECK-LABEL: define {{[^@]+}}@helper2() {
; CHECK-NEXT: [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
-; CHECK-NEXT: store i32 undef, i32* @G, align 4
+; CHECK-NEXT: store i32 [[THREADLIMIT]], i32* @G, align 4
; CHECK-NEXT: ret void
;
%threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
More information about the llvm-commits
mailing list