[llvm] bd31abc - [OpenMPOpt] Refactored "issue" and "wait" declarations for data map runtime call.
Hamilton Tobon Mosquera via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 22 08:51:12 PDT 2020
Author: Hamilton Tobon Mosquera
Date: 2020-09-22T10:50:17-05:00
New Revision: bd31abc1d0f17536fcd85f4dfcc79d37834aac20
URL: https://github.com/llvm/llvm-project/commit/bd31abc1d0f17536fcd85f4dfcc79d37834aac20
DIFF: https://github.com/llvm/llvm-project/commit/bd31abc1d0f17536fcd85f4dfcc79d37834aac20.diff
LOG: [OpenMPOpt] Refactored "issue" and "wait" declarations for data map runtime call.
Refactored __tgt_target_data_begin_mapper_<issue|wait> to receive the handle as an input/output argument.
This given the compiler warning of returning the handle as copy.
Differential Revision: https://reviews.llvm.org/D88029
Added:
Modified:
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 1b39fff3edec..e64ced16b755 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -483,9 +483,9 @@ __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, Int64, Int32, VoidPtrPtr,
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, Int64, Int32,
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, AsyncInfo, Int64, Int32,
- VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfo)
+__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, Int64, Int32,
+ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, AsyncInfoPtr)
+__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr)
__OMP_RTL(__tgt_target_data_end_mapper, false, Void, Int64, Int32, VoidPtrPtr,
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, Int64, Int32,
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 3804a4bb7921..14b7c96d4486 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -812,7 +812,15 @@ struct OpenMPOpt {
/// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
Instruction &WaitMovementPoint) {
+ // Create stack allocated handle (__tgt_async_info) at the beginning of the
+ // function. Used for storing information of the async transfer, allowing to
+ // wait on it later.
auto &IRBuilder = OMPInfoCache.OMPBuilder;
+ auto *F = RuntimeCall.getCaller();
+ Instruction *FirstInst = &(F->getEntryBlock().front());
+ AllocaInst *Handle = new AllocaInst(
+ IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
+
// Add "issue" runtime call declaration:
// declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
// i8**, i8**, i64*, i64*)
@@ -823,9 +831,10 @@ struct OpenMPOpt {
SmallVector<Value *, 8> Args;
for (auto &Arg : RuntimeCall.args())
Args.push_back(Arg.get());
+ Args.push_back(Handle);
CallInst *IssueCallsite =
- CallInst::Create(IssueDecl, Args, "handle", &RuntimeCall);
+ CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
RuntimeCall.eraseFromParent();
// Add "wait" runtime call declaration:
@@ -834,9 +843,10 @@ struct OpenMPOpt {
M, OMPRTL___tgt_target_data_begin_mapper_wait);
// Add call site to WaitDecl.
+ const unsigned DeviceIDArgNum = 0;
Value *WaitParams[2] = {
- IssueCallsite->getArgOperand(0), // device_id.
- IssueCallsite // returned handle.
+ IssueCallsite->getArgOperand(DeviceIDArgNum), // device_id.
+ Handle // handle to wait on.
};
CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
diff --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
index 4f4bf66f7b82..fe618a23866d 100644
--- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
+++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
@@ -38,8 +38,11 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
; return random + a;
;}
define dso_local double @heavyComputation1() {
-; CHECK-LABEL: define {{[^@]+}}@heavyComputation1()
+; CHECK-LABEL: define {{[^@]+}}@heavyComputation1() {
; CHECK-NEXT: entry:
+
+; CHECK-NEXT: %handle = alloca %struct.__tgt_async_info, align 8
+
; CHECK-NEXT: %a = alloca double, align 8
; CHECK-NEXT: %.offload_baseptrs = alloca [1 x i8*], align 8
; CHECK-NEXT: %.offload_ptrs = alloca [1 x i8*], align 8
@@ -58,11 +61,11 @@ define dso_local double @heavyComputation1() {
; CHECK-NEXT: %4 = bitcast [1 x i8*]* %.offload_ptrs to double**
; CHECK-NEXT: store double* %a, double** %4, align 8
-; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null)
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, %struct.__tgt_async_info* %handle)
; CHECK-NEXT: %5 = bitcast double* %a to i64*
-; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* %handle)
; CHECK-NEXT: %6 = load i64, i64* %5, align 8
; CHECK-NEXT: %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0
@@ -157,7 +160,7 @@ entry:
; return random;
;}
define dso_local i32 @heavyComputation2(double* %a, i32 %size) {
-; CHECK-LABEL: define {{[^@]+}}@heavyComputation2(double* %a, i32 %size)
+; CHECK-LABEL: define {{[^@]+}}@heavyComputation2(double* %a, i32 %size) {
; CHECK-NEXT: entry:
; CHECK-NEXT: %size.addr = alloca i32, align 4
; CHECK-NEXT: %.offload_baseptrs = alloca [2 x i8*], align 8
@@ -297,7 +300,7 @@ entry:
; return random;
;}
define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) {
-; CHECK-LABEL: define {{[^@]+}}@heavyComputation3(double* noalias %a, i32 %size)
+; CHECK-LABEL: define {{[^@]+}}@heavyComputation3(double* noalias %a, i32 %size) {
; CHECK-NEXT: entry:
; CHECK-NEXT: %size.addr = alloca i32, align 4
; CHECK-NEXT: %.offload_baseptrs = alloca [2 x i8*], align 8
@@ -435,8 +438,11 @@ entry:
; return random;
;}
define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) {
-; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1(double* noalias %a, i32 %size)
+; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1(double* noalias %a, i32 %size) {
; CHECK-NEXT: entry:
+
+; CHECK-NEXT: %handle = alloca %struct.__tgt_async_info, align 8
+
; CHECK-NEXT: %.offload_baseptrs = alloca [1 x i8*], align 8
; CHECK-NEXT: %.offload_ptrs = alloca [1 x i8*], align 8
; CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
@@ -452,11 +458,11 @@ define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) {
; CHECK-NEXT: %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0
; CHECK-NEXT: store i64 %0, i64* %5, align 8
-; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null)
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, %struct.__tgt_async_info* %handle)
; CHECK-NEXT: %rem = urem i32 %call, %size
-; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* %handle)
; CHECK-NEXT: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null)
; CHECK-NEXT: ret i32 %rem
@@ -493,5 +499,5 @@ declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8*
declare dso_local i32 @rand(...)
-; CHECK: declare %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**)
-; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info)
+; CHECK: declare void @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**, %struct.__tgt_async_info*)
+; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*)
More information about the llvm-commits
mailing list