[Openmp-commits] [PATCH] D105237: [libomptarget][nfc] Move grid size computation

Jon Chesterfield via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Wed Jun 30 15:16:43 PDT 2021


JonChesterfield created this revision.
JonChesterfield added reviewers: gregrodgers, ronlieb, dhruvachak, pdhaliwal.
Herald added subscribers: kerbowa, nhaehnle, jvesely.
JonChesterfield requested review of this revision.
Herald added a project: OpenMP.
Herald added a subscriber: openmp-commits.

Change getLaunchVals to return the integers used for launch


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D105237

Files:
  openmp/libomptarget/plugins/amdgpu/src/rtl.cpp


Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1885,8 +1885,8 @@
 //         EnvTeamLimit, EnvNumTeams, num_teams, thread_limit,
 //         loop_tripcount.
 struct launchVals {
-  int threadsPerGroup;
-  int num_groups;
+  int WorkgroupSize;
+  int GridSize;
 };
 
 launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
@@ -2030,8 +2030,8 @@
      threadsPerGroup);
 
   launchVals res;
-  res.threadsPerGroup = threadsPerGroup;
-  res.num_groups = num_groups;
+  res.WorkgroupSize = threadsPerGroup;
+  res.GridSize = threadsPerGroup * num_groups;
   return res;
 }
 
@@ -2117,10 +2117,11 @@
                     thread_limit,   // From run_region arg
                     loop_tripcount, // From run_region arg
                     DeviceInfo.NumTeams[KernelInfo->device_id]);
-  int num_groups = LV.num_groups;
-  int threadsPerGroup = LV.threadsPerGroup;
+  const int GridSize = LV.GridSize;
+  const int WorkgroupSize = LV.WorkgroupSize;
 
   if (print_kernel_trace >= LAUNCH) {
+    int num_groups = GridSize / WorkgroupSize;
     // enum modes are SPMD, GENERIC, NONE 0,1,2
     // if doing rtl timing, print to stderr, unless stdout requested.
     bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING);
@@ -2129,7 +2130,7 @@
             "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u "
             "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu n:%s\n",
             device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize,
-            arg_num, num_groups, threadsPerGroup, num_teams, thread_limit,
+            arg_num, num_groups, WorkgroupSize, num_teams, thread_limit,
             group_segment_size, sgpr_count, vgpr_count, sgpr_spill_count,
             vgpr_spill_count, loop_tripcount, KernelInfo->Name);
   }
@@ -2149,11 +2150,11 @@
 
     // packet->header is written last
     packet->setup = UINT16_C(1) << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
-    packet->workgroup_size_x = threadsPerGroup;
+    packet->workgroup_size_x = WorkgroupSize;
     packet->workgroup_size_y = 1;
     packet->workgroup_size_z = 1;
     packet->reserved0 = 0;
-    packet->grid_size_x = num_groups * threadsPerGroup;
+    packet->grid_size_x = GridSize;
     packet->grid_size_y = 1;
     packet->grid_size_z = 1;
     packet->private_segment_size = KernelInfoEntry.private_segment_size;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D105237.355702.patch
Type: text/x-patch
Size: 2548 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20210630/415f2436/attachment.bin>


More information about the Openmp-commits mailing list