[llvm] e8439ec - [OpenMP] Set RequiresFullRuntime false in SPMDization
Giorgis Georgakoudis via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 20 09:54:58 PDT 2021
Author: Giorgis Georgakoudis
Date: 2021-07-20T09:54:51-07:00
New Revision: e8439ec893b823967ef83adfe1e1cd60d6a305ee
URL: https://github.com/llvm/llvm-project/commit/e8439ec893b823967ef83adfe1e1cd60d6a305ee
DIFF: https://github.com/llvm/llvm-project/commit/e8439ec893b823967ef83adfe1e1cd60d6a305ee.diff
LOG: [OpenMP] Set RequiresFullRuntime false in SPMDization
SPMDization in D102307 does not change the RequiresFullRuntime argument of kmpc_target_init/deinit calls. However, the constraints of SPMDization detection for converting a target region to SPMD mode should guarantee that the region does not require full runtime support. Hence, this patch sets RequiresFullRuntime to false for improved execution performance.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D105556
Added:
Modified:
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/custom_state_machines.ll
llvm/test/Transforms/OpenMP/spmdization.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 6862a3d2fcfe..ff66fb3a003c 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -493,7 +493,8 @@ struct KernelInfoState : AbstractState {
BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
/// State to track if we are in SPMD-mode, assumed or know, and why we decided
- /// we cannot be.
+ /// we cannot be. If it is assumed, then RequiresFullRuntime should also be
+ /// false.
BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;
/// The __kmpc_target_init call in this kernel, if any. If we find more than
@@ -2773,9 +2774,32 @@ struct AAKernelInfoFunction : AAKernelInfo {
return Val;
};
+ Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
+ [&](const IRPosition &IRP, const AbstractAttribute *AA,
+ bool &UsedAssumedInformation) -> Optional<Value *> {
+ // IRP represents the "RequiresFullRuntime" argument of an
+ // __kmpc_target_init or __kmpc_target_deinit call. We will answer this
+ // one with the internal state of the SPMDCompatibilityTracker, so if
+ // generic then true, if SPMD then false.
+ if (!SPMDCompatibilityTracker.isValidState())
+ return nullptr;
+ if (!SPMDCompatibilityTracker.isAtFixpoint()) {
+ if (AA)
+ A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
+ UsedAssumedInformation = true;
+ } else {
+ UsedAssumedInformation = false;
+ }
+ auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
+ !SPMDCompatibilityTracker.isAssumed());
+ return Val;
+ };
+
constexpr const int InitIsSPMDArgNo = 1;
constexpr const int DeinitIsSPMDArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
+ constexpr const int InitRequiresFullRuntimeArgNo = 3;
+ constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
@@ -2785,6 +2809,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
IsSPMDModeSimplifyCB);
+ A.registerSimplificationCallback(
+ IRPosition::callsite_argument(*KernelInitCB,
+ InitRequiresFullRuntimeArgNo),
+ IsGenericModeSimplifyCB);
+ A.registerSimplificationCallback(
+ IRPosition::callsite_argument(*KernelDeinitCB,
+ DeinitRequiresFullRuntimeArgNo),
+ IsGenericModeSimplifyCB);
// Check if we know we are in SPMD-mode already.
ConstantInt *IsSPMDArg =
@@ -2861,6 +2893,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
const int InitIsSPMDArgNo = 1;
const int DeinitIsSPMDArgNo = 1;
const int InitUseStateMachineArgNo = 2;
+ const int InitRequiresFullRuntimeArgNo = 3;
+ const int DeinitRequiresFullRuntimeArgNo = 2;
auto &Ctx = getAnchorValue().getContext();
A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
@@ -2871,6 +2905,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
A.changeUseAfterManifest(
KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
*ConstantInt::getBool(Ctx, 1));
+ A.changeUseAfterManifest(
+ KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
+ *ConstantInt::getBool(Ctx, 0));
+ A.changeUseAfterManifest(
+ KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
+ *ConstantInt::getBool(Ctx, 0));
+
++NumOpenMPTargetRegionKernelsSPMD;
auto Remark = [&](OptimizationRemark OR) {
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 482ee409c21a..012e25ccf0b0 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -131,7 +131,8 @@ target triple = "nvptx64"
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_2c_389eb_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_l19_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_pure_l72_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86_exec_mode, i8* @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106_exec_mode], section "llvm.metadata"
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
+; The second to last argument of __kmpc_target_init is set to false to indicate we do not need the generic runtime state machine.
+; The last argument is also set to false due to SPMDization.
; No user code state machine is build because we do not need one.
define weak void @__omp_offloading_2c_389eb_no_state_machine_needed_l14() #0 {
entry:
@@ -1525,14 +1526,14 @@ attributes #10 = { convergent nounwind readonly willreturn }
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef true, i1 noundef false, i1 noundef true)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef true, i1 noundef false, i1 noundef false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; CHECK-NEXT: call void @__omp_outlined__12(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 6ecda643acdb..7dedbc851cfe 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -27,6 +27,7 @@ target triple = "nvptx64"
@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_2c_38c77_sequential_loop_l4_exec_mode], section "llvm.metadata"
; The second argument of __kmpc_target_init and deinit is is set to true to indicate that we can run in SPMD mode.
+; The last argument is set to false since full runtime support is not needed in SPMDization.
; We also adjusted the global __omp_offloading_2c_38c77_sequential_loop_l4_exec_mode to have a zero initializer (which indicates SPMD mode to the runtime).
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
@@ -41,14 +42,14 @@ define weak void @__omp_offloading_2c_38c77_sequential_loop_l4() #0 {
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true)
+; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
More information about the llvm-commits
mailing list