[Openmp-commits] [openmp] 626a31d - [libomptarget] Add register usage info to kernel metadata

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Thu Mar 18 10:01:10 PDT 2021


Author: Jon Chesterfield
Date: 2021-03-18T17:00:42Z
New Revision: 626a31de15212a0e0c25df8435753cb9a0684668

URL: https://github.com/llvm/llvm-project/commit/626a31de15212a0e0c25df8435753cb9a0684668
DIFF: https://github.com/llvm/llvm-project/commit/626a31de15212a0e0c25df8435753cb9a0684668.diff

LOG: [libomptarget] Add register usage info to kernel metadata

Add register usage information to the runtime metadata so that it can be used during kernel launch (that change will be in a different commit). Add this information to the kernel trace.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D98829

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/impl/internal.h
    openmp/libomptarget/plugins/amdgpu/impl/system.cpp
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
index 1b1d69328785..8ca66a9d478e 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -97,6 +97,10 @@ typedef struct atl_kernel_info_s {
   uint64_t kernel_object;
   uint32_t group_segment_size;
   uint32_t private_segment_size;
+  uint32_t sgpr_count;
+  uint32_t vgpr_count;
+  uint32_t sgpr_spill_count;
+  uint32_t vgpr_spill_count;
   uint32_t kernel_segment_size;
   uint32_t num_args;
   std::vector<uint64_t> arg_alignments;

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index da152b4045d1..d6cde1f699c2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -832,7 +832,31 @@ static hsa_status_t get_code_object_custom_metadata(void *binary,
     msgpack_errors += map_lookup_string(element, ".symbol", &symbolName);
     msgpackErrorCheck(strings lookup in kernel metadata, msgpack_errors);
 
-    atl_kernel_info_t info = {0, 0, 0, 0, 0, {}, {}, {}};
+    atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, {}, {}, {}};
+
+    uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
+    msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
+    msgpackErrorCheck(sgpr count metadata lookup in kernel metadata,
+                      msgpack_errors);
+    info.sgpr_count = sgpr_count;
+
+    msgpack_errors += map_lookup_uint64_t(element, ".vgpr_count", &vgpr_count);
+    msgpackErrorCheck(vgpr count metadata lookup in kernel metadata,
+                      msgpack_errors);
+    info.vgpr_count = vgpr_count;
+
+    msgpack_errors +=
+        map_lookup_uint64_t(element, ".sgpr_spill_count", &sgpr_spill_count);
+    msgpackErrorCheck(sgpr spill count metadata lookup in kernel metadata,
+                      msgpack_errors);
+    info.sgpr_spill_count = sgpr_spill_count;
+
+    msgpack_errors +=
+        map_lookup_uint64_t(element, ".vgpr_spill_count", &vgpr_spill_count);
+    msgpackErrorCheck(vgpr spill count metadata lookup in kernel metadata,
+                      msgpack_errors);
+    info.vgpr_spill_count = vgpr_spill_count;
+
     size_t kernel_explicit_args_size = 0;
     uint64_t kernel_segment_size;
     msgpack_errors += map_lookup_uint64_t(element, ".kernarg_segment_size",

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 0e8df9e9ca60..a6b426dc0557 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1759,6 +1759,19 @@ int32_t __tgt_rtl_run_target_team_region_locked(
 
   KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr;
 
+  std::string kernel_name = std::string(KernelInfo->Name);
+  uint32_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
+
+  {
+    assert(KernelInfoTable[device_id].find(kernel_name) !=
+           KernelInfoTable[device_id].end());
+    auto it = KernelInfoTable[device_id][kernel_name];
+    sgpr_count = it.sgpr_count;
+    vgpr_count = it.vgpr_count;
+    sgpr_spill_count = it.sgpr_spill_count;
+    vgpr_spill_count = it.vgpr_spill_count;
+  }
+
   /*
    * Set limit based on ThreadsPerGroup and GroupsPerDevice
    */
@@ -1780,10 +1793,12 @@ int32_t __tgt_rtl_run_target_team_region_locked(
     bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING);
     fprintf(traceToStdout ? stdout : stderr,
             "DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) "
-            "reqd:(%4dX%4d) n:%s\n",
+            "reqd:(%4dX%4d) sgpr_count:%u vgpr_count:%u sgpr_spill_count:%u "
+            "vgpr_spill_count:%u tripcount:%lu n:%s\n",
             device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize,
             arg_num, num_groups, threadsPerGroup, num_teams, thread_limit,
-            KernelInfo->Name);
+            sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count,
+            loop_tripcount, KernelInfo->Name);
   }
 
   // Run on the device.
@@ -1812,7 +1827,6 @@ int32_t __tgt_rtl_run_target_team_region_locked(
     packet->reserved2 = 0;           // atmi writes id_ here
     packet->completion_signal = {0}; // may want a pool of signals
 
-    std::string kernel_name = std::string(KernelInfo->Name);
     {
       assert(KernelInfoTable[device_id].find(kernel_name) !=
              KernelInfoTable[device_id].end());


        


More information about the Openmp-commits mailing list