[libc-commits] [libc] [libc] Add Kernel Resource Usage to nvptx-loader (PR #97503)

via libc-commits libc-commits at lists.llvm.org
Tue Jul 2 18:06:00 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: None (jameshu15869)

<details>
<summary>Changes</summary>

This PR allows `nvptx-loader` to read the resource usage of `_start`, `_begin`, and `_end` when executing CUDA binaries. 

Example output:
```
$ nvptx-loader --print-resource-usage libc/benchmarks/gpu/src/ctype/libc.benchmarks.gpu.src.ctype.isalnum_benchmark.__build__
[ RUN      ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper
[       OK ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper: 93 cycles, 76 min, 470 max, 23 iterations, 78000 ns, 80 stddev
_begin registers: 25
_start registers: 80
_end registers: 62
  ```

---
Full diff: https://github.com/llvm/llvm-project/pull/97503.diff


5 Files Affected:

- (modified) libc/benchmarks/gpu/CMakeLists.txt (+3) 
- (modified) libc/cmake/modules/LLVMLibCTestRules.cmake (+10) 
- (modified) libc/utils/gpu/loader/Loader.h (+1) 
- (modified) libc/utils/gpu/loader/Main.cpp (+5-1) 
- (modified) libc/utils/gpu/loader/nvptx/Loader.cpp (+18) 


``````````diff
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index d167abcaf2db1..4790e55bec478 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -19,9 +19,12 @@ function(add_benchmark benchmark_name)
     LINK_LIBRARIES
       LibcGpuBenchmark.hermetic
       ${BENCHMARK_LINK_LIBRARIES}
+    LOADER_ARGS
+      "--print-resource-usage"
     ${BENCHMARK_UNPARSED_ARGUMENTS}
   )
   get_fq_target_name(${benchmark_name} fq_target_name)
+
   add_dependencies(gpu-benchmark ${fq_target_name})
 endfunction(add_benchmark)
 
diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index fbeec32883b63..52bc2ad03a30d 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -709,12 +709,22 @@ function(add_libc_hermetic test_name)
       $<TARGET_FILE:${fq_build_target_name}> ${HERMETIC_TEST_ARGS})
   add_custom_target(
     ${fq_target_name}
+    DEPENDS ${fq_target_name}-cmd
+  )
+
+  add_custom_command(
+    OUTPUT ${fq_target_name}-cmd
     COMMAND ${test_cmd}
     COMMAND_EXPAND_LISTS
     COMMENT "Running hermetic test ${fq_target_name}"
     ${LIBC_HERMETIC_TEST_JOB_POOL}
   )
 
+  set_source_files_properties(${fq_target_name}-cmd
+    PROPERTIES
+      SYMBOLIC "TRUE"
+  )
+
   add_dependencies(${HERMETIC_TEST_SUITE} ${fq_target_name})
   if(NOT ${HERMETIC_TEST_IS_BENCHMARK})
     # If it is a benchmark, it will already have been added to the
diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h
index eae2776b2773f..f576c58d902a1 100644
--- a/libc/utils/gpu/loader/Loader.h
+++ b/libc/utils/gpu/loader/Loader.h
@@ -28,6 +28,7 @@ struct LaunchParameters {
   uint32_t num_blocks_x;
   uint32_t num_blocks_y;
   uint32_t num_blocks_z;
+  bool print_resource_usage;
 };
 
 /// The arguments to the '_begin' kernel.
diff --git a/libc/utils/gpu/loader/Main.cpp b/libc/utils/gpu/loader/Main.cpp
index b711ec91c9f30..dfaee4d857826 100644
--- a/libc/utils/gpu/loader/Main.cpp
+++ b/libc/utils/gpu/loader/Main.cpp
@@ -20,7 +20,8 @@
 
 int main(int argc, char **argv, char **envp) {
   if (argc < 2) {
-    printf("USAGE: ./loader [--threads <n>, --blocks <n>] <device_image> "
+    printf("USAGE: ./loader [--threads <n>, --blocks <n>, "
+           "--print-resource-usage] <device_image> "
            "<args>, ...\n");
     return EXIT_SUCCESS;
   }
@@ -62,6 +63,9 @@ int main(int argc, char **argv, char **envp) {
           offset + 1 < argc ? strtoul(argv[offset + 1], &ptr, 10) : 1;
       offset++;
       continue;
+    } else if (argv[offset] == std::string("--print-resource-usage")) {
+      params.print_resource_usage = true;
+      continue;
     } else {
       file = fopen(argv[offset], "r");
       if (!file) {
diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp
index 012cb778ecf15..90e52ddb008da 100644
--- a/libc/utils/gpu/loader/nvptx/Loader.cpp
+++ b/libc/utils/gpu/loader/nvptx/Loader.cpp
@@ -229,6 +229,17 @@ CUresult launch_kernel(CUmodule binary, CUstream stream,
   return CUDA_SUCCESS;
 }
 
+void print_resource_usage(CUmodule binary, const char *kernel_name) {
+  CUfunction function;
+  if (CUresult err = cuModuleGetFunction(&function, binary, kernel_name))
+    handle_error(err);
+  int num_regs;
+  if (CUresult err =
+          cuFuncGetAttribute(&num_regs, CU_FUNC_ATTRIBUTE_NUM_REGS, function))
+    handle_error(err);
+  fprintf(stderr, "%6s registers: %d\n", kernel_name, num_regs);
+}
+
 int load(int argc, char **argv, char **envp, void *image, size_t size,
          const LaunchParameters &params) {
   if (CUresult err = cuInit(0))
@@ -341,6 +352,13 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
   if (CUresult err = cuStreamSynchronize(stream))
     handle_error(err);
 
+  // Print resource usage if requested.
+  if (params.print_resource_usage) {
+    print_resource_usage(binary, "_begin");
+    print_resource_usage(binary, "_start");
+    print_resource_usage(binary, "_end");
+  }
+
   end_args_t fini_args = {host_ret};
   if (CUresult err = launch_kernel(binary, stream, rpc_device,
                                    single_threaded_params, "_end", fini_args))

``````````

</details>


https://github.com/llvm/llvm-project/pull/97503


More information about the libc-commits mailing list