[Openmp-commits] [openmp] db89414 - [libomptarget][nfc] Move grid size computation

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Thu Jul 1 04:53:20 PDT 2021


Author: Jon Chesterfield
Date: 2021-07-01T12:53:04+01:00
New Revision: db89414da4eae1c4cde63b8a4b0c762bbebf53b7

URL: https://github.com/llvm/llvm-project/commit/db89414da4eae1c4cde63b8a4b0c762bbebf53b7
DIFF: https://github.com/llvm/llvm-project/commit/db89414da4eae1c4cde63b8a4b0c762bbebf53b7.diff

LOG: [libomptarget][nfc] Move grid size computation

Change getLaunchVals to return the integers used for launch

Reviewed By: pdhaliwal

Differential Revision: https://reviews.llvm.org/D105237

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index b04745529652..8f40778083cf 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1886,8 +1886,8 @@ int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) {
 //         EnvTeamLimit, EnvNumTeams, num_teams, thread_limit,
 //         loop_tripcount.
 struct launchVals {
-  int threadsPerGroup;
-  int num_groups;
+  int WorkgroupSize;
+  int GridSize;
 };
 
 launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
@@ -2031,8 +2031,8 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
      threadsPerGroup);
 
   launchVals res;
-  res.threadsPerGroup = threadsPerGroup;
-  res.num_groups = num_groups;
+  res.WorkgroupSize = threadsPerGroup;
+  res.GridSize = threadsPerGroup * num_groups;
   return res;
 }
 
@@ -2118,10 +2118,11 @@ int32_t __tgt_rtl_run_target_team_region_locked(
                     thread_limit,   // From run_region arg
                     loop_tripcount, // From run_region arg
                     DeviceInfo.NumTeams[KernelInfo->device_id]);
-  int num_groups = LV.num_groups;
-  int threadsPerGroup = LV.threadsPerGroup;
+  const int GridSize = LV.GridSize;
+  const int WorkgroupSize = LV.WorkgroupSize;
 
   if (print_kernel_trace >= LAUNCH) {
+    int num_groups = GridSize / WorkgroupSize;
     // enum modes are SPMD, GENERIC, NONE 0,1,2
     // if doing rtl timing, print to stderr, unless stdout requested.
     bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING);
@@ -2130,7 +2131,7 @@ int32_t __tgt_rtl_run_target_team_region_locked(
             "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u "
             "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu n:%s\n",
             device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize,
-            arg_num, num_groups, threadsPerGroup, num_teams, thread_limit,
+            arg_num, num_groups, WorkgroupSize, num_teams, thread_limit,
             group_segment_size, sgpr_count, vgpr_count, sgpr_spill_count,
             vgpr_spill_count, loop_tripcount, KernelInfo->Name);
   }
@@ -2150,11 +2151,11 @@ int32_t __tgt_rtl_run_target_team_region_locked(
 
     // packet->header is written last
     packet->setup = UINT16_C(1) << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
-    packet->workgroup_size_x = threadsPerGroup;
+    packet->workgroup_size_x = WorkgroupSize;
     packet->workgroup_size_y = 1;
     packet->workgroup_size_z = 1;
     packet->reserved0 = 0;
-    packet->grid_size_x = num_groups * threadsPerGroup;
+    packet->grid_size_x = GridSize;
     packet->grid_size_y = 1;
     packet->grid_size_z = 1;
     packet->private_segment_size = KernelInfoEntry.private_segment_size;


        


More information about the Openmp-commits mailing list