[llvm] bd31abc - [OpenMPOpt] Refactored "issue" and "wait" declarations for data map runtime call.

Hamilton Tobon Mosquera via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 22 08:51:12 PDT 2020


Author: Hamilton Tobon Mosquera
Date: 2020-09-22T10:50:17-05:00
New Revision: bd31abc1d0f17536fcd85f4dfcc79d37834aac20

URL: https://github.com/llvm/llvm-project/commit/bd31abc1d0f17536fcd85f4dfcc79d37834aac20
DIFF: https://github.com/llvm/llvm-project/commit/bd31abc1d0f17536fcd85f4dfcc79d37834aac20.diff

LOG: [OpenMPOpt] Refactored "issue" and "wait" declarations for data map runtime call.

Refactored __tgt_target_data_begin_mapper_<issue|wait> to receive the handle as an input/output argument.
This given the compiler warning of returning the handle as copy.

Differential Revision: https://reviews.llvm.org/D88029

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 1b39fff3edec..e64ced16b755 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -483,9 +483,9 @@ __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, Int64, Int32, VoidPtrPtr,
           VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
 __OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, Int64, Int32,
           VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, AsyncInfo, Int64, Int32,
-          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfo)
+__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, Int64, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, AsyncInfoPtr)
+__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr)
 __OMP_RTL(__tgt_target_data_end_mapper, false, Void, Int64, Int32, VoidPtrPtr,
           VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
 __OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, Int64, Int32,

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 3804a4bb7921..14b7c96d4486 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -812,7 +812,15 @@ struct OpenMPOpt {
   /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
   bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
                                Instruction &WaitMovementPoint) {
+    // Create stack allocated handle (__tgt_async_info) at the beginning of the
+    // function. Used for storing information of the async transfer, allowing to
+    // wait on it later.
     auto &IRBuilder = OMPInfoCache.OMPBuilder;
+    auto *F = RuntimeCall.getCaller();
+    Instruction *FirstInst = &(F->getEntryBlock().front());
+    AllocaInst *Handle = new AllocaInst(
+        IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
+
     // Add "issue" runtime call declaration:
     // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
     //   i8**, i8**, i64*, i64*)
@@ -823,9 +831,10 @@ struct OpenMPOpt {
     SmallVector<Value *, 8> Args;
     for (auto &Arg : RuntimeCall.args())
       Args.push_back(Arg.get());
+    Args.push_back(Handle);
 
     CallInst *IssueCallsite =
-        CallInst::Create(IssueDecl, Args, "handle", &RuntimeCall);
+        CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
     RuntimeCall.eraseFromParent();
 
     // Add "wait" runtime call declaration:
@@ -834,9 +843,10 @@ struct OpenMPOpt {
         M, OMPRTL___tgt_target_data_begin_mapper_wait);
 
     // Add call site to WaitDecl.
+    const unsigned DeviceIDArgNum = 0;
     Value *WaitParams[2] = {
-        IssueCallsite->getArgOperand(0), // device_id.
-        IssueCallsite // returned handle.
+        IssueCallsite->getArgOperand(DeviceIDArgNum), // device_id.
+        Handle                                        // handle to wait on.
     };
     CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
 

diff  --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
index 4f4bf66f7b82..fe618a23866d 100644
--- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
+++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
@@ -38,8 +38,11 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ;  return random + a;
 ;}
 define dso_local double @heavyComputation1() {
-; CHECK-LABEL: define {{[^@]+}}@heavyComputation1()
+; CHECK-LABEL: define {{[^@]+}}@heavyComputation1() {
 ; CHECK-NEXT:  entry:
+
+; CHECK-NEXT:    %handle = alloca %struct.__tgt_async_info, align 8
+
 ; CHECK-NEXT:    %a = alloca double, align 8
 ; CHECK-NEXT:    %.offload_baseptrs = alloca [1 x i8*], align 8
 ; CHECK-NEXT:    %.offload_ptrs = alloca [1 x i8*], align 8
@@ -58,11 +61,11 @@ define dso_local double @heavyComputation1() {
 ; CHECK-NEXT:    %4 = bitcast [1 x i8*]* %.offload_ptrs to double**
 ; CHECK-NEXT:    store double* %a, double** %4, align 8
 
-; CHECK-NEXT:    %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null)
+; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, %struct.__tgt_async_info* %handle)
 
 ; CHECK-NEXT:    %5 = bitcast double* %a to i64*
 
-; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* %handle)
 
 ; CHECK-NEXT:    %6 = load i64, i64* %5, align 8
 ; CHECK-NEXT:    %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0
@@ -157,7 +160,7 @@ entry:
 ;  return random;
 ;}
 define dso_local i32 @heavyComputation2(double* %a, i32 %size) {
-; CHECK-LABEL: define {{[^@]+}}@heavyComputation2(double* %a, i32 %size)
+; CHECK-LABEL: define {{[^@]+}}@heavyComputation2(double* %a, i32 %size) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    %size.addr = alloca i32, align 4
 ; CHECK-NEXT:    %.offload_baseptrs = alloca [2 x i8*], align 8
@@ -297,7 +300,7 @@ entry:
 ;  return random;
 ;}
 define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) {
-; CHECK-LABEL: define {{[^@]+}}@heavyComputation3(double* noalias %a, i32 %size)
+; CHECK-LABEL: define {{[^@]+}}@heavyComputation3(double* noalias %a, i32 %size) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    %size.addr = alloca i32, align 4
 ; CHECK-NEXT:    %.offload_baseptrs = alloca [2 x i8*], align 8
@@ -435,8 +438,11 @@ entry:
 ;  return random;
 ;}
 define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) {
-; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1(double* noalias %a, i32 %size)
+; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1(double* noalias %a, i32 %size) {
 ; CHECK-NEXT:  entry:
+
+; CHECK-NEXT:    %handle = alloca %struct.__tgt_async_info, align 8
+
 ; CHECK-NEXT:    %.offload_baseptrs = alloca [1 x i8*], align 8
 ; CHECK-NEXT:    %.offload_ptrs = alloca [1 x i8*], align 8
 ; CHECK-NEXT:    %.offload_sizes = alloca [1 x i64], align 8
@@ -452,11 +458,11 @@ define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) {
 ; CHECK-NEXT:    %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0
 ; CHECK-NEXT:    store i64 %0, i64* %5, align 8
 
-; CHECK-NEXT:    %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null)
+; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, %struct.__tgt_async_info* %handle)
 
 ; CHECK-NEXT:    %rem = urem i32 %call, %size
 
-; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* %handle)
 
 ; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null)
 ; CHECK-NEXT:    ret i32 %rem
@@ -493,5 +499,5 @@ declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8*
 
 declare dso_local i32 @rand(...)
 
-; CHECK: declare %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**)
-; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info)
+; CHECK: declare void @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**, %struct.__tgt_async_info*)
+; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*)


        


More information about the llvm-commits mailing list