[libc-commits] [libc] [libc] Add Kernel Resource Usage to nvptx-loader (PR #97503)
via libc-commits
libc-commits at lists.llvm.org
Tue Jul 2 18:05:30 PDT 2024
https://github.com/jameshu15869 created https://github.com/llvm/llvm-project/pull/97503
This PR allows `nvptx-loader` to read the resource usage of `_start`, `_begin`, and `_end` when executing CUDA binaries.
Example output:
```
$ nvptx-loader --print-resource-usage libc/benchmarks/gpu/src/ctype/libc.benchmarks.gpu.src.ctype.isalnum_benchmark.__build__
[ RUN ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper
[ OK ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper: 93 cycles, 76 min, 470 max, 23 iterations, 78000 ns, 80 stddev
_begin registers: 25
_start registers: 80
_end registers: 62
```
>From bd53bf13630bc84492684940ef834677ba0e320e Mon Sep 17 00:00:00 2001
From: jameshu15869 <jhudson15869 at gmail.com>
Date: Wed, 26 Jun 2024 18:33:38 -0400
Subject: [PATCH 1/2] support basic resource usage using cuobjdump
---
libc/benchmarks/gpu/CMakeLists.txt | 17 +++++++++++++++++
libc/cmake/modules/LLVMLibCTestRules.cmake | 10 ++++++++++
2 files changed, 27 insertions(+)
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index d167abcaf2db1..7f006329df198 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -22,6 +22,23 @@ function(add_benchmark benchmark_name)
${BENCHMARK_UNPARSED_ARGUMENTS}
)
get_fq_target_name(${benchmark_name} fq_target_name)
+ set(fq_build_target_name ${fq_target_name}.__build__)
+
+ set(grep_res_usage_cmd grep -A 3 "${benchmark_name}")
+ set(res_usage_cmd cuobjdump -res-usage $<TARGET_FILE:${fq_build_target_name}> | ${grep_res_usage_cmd})
+ add_custom_command(
+ OUTPUT ${fq_target_name}-cmd APPEND
+ COMMAND ${res_usage_cmd}
+ COMMAND_EXPAND_LISTS
+ COMMENT "Reading resource usage for ${benchmark_name}"
+ ${LIBC_HERMETIC_TEST_JOB_POOL}
+ )
+ # add_dependencies(${fq_target_name} ${fq_target_name}_resources)
+ # set_source_files_properties(${fq_target_name}_resources
+ # PROPERTIES
+ # SYMBOLIC "TRUE"
+ # )
+
add_dependencies(gpu-benchmark ${fq_target_name})
endfunction(add_benchmark)
diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index fbeec32883b63..52bc2ad03a30d 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -709,12 +709,22 @@ function(add_libc_hermetic test_name)
$<TARGET_FILE:${fq_build_target_name}> ${HERMETIC_TEST_ARGS})
add_custom_target(
${fq_target_name}
+ DEPENDS ${fq_target_name}-cmd
+ )
+
+ add_custom_command(
+ OUTPUT ${fq_target_name}-cmd
COMMAND ${test_cmd}
COMMAND_EXPAND_LISTS
COMMENT "Running hermetic test ${fq_target_name}"
${LIBC_HERMETIC_TEST_JOB_POOL}
)
+ set_source_files_properties(${fq_target_name}-cmd
+ PROPERTIES
+ SYMBOLIC "TRUE"
+ )
+
add_dependencies(${HERMETIC_TEST_SUITE} ${fq_target_name})
if(NOT ${HERMETIC_TEST_IS_BENCHMARK})
# If it is a benchmark, it will already have been added to the
>From 415098692de3b9489eb7e1791e7832e93dc7eaab Mon Sep 17 00:00:00 2001
From: jameshu15869 <jhudson15869 at gmail.com>
Date: Tue, 2 Jul 2024 21:02:09 -0400
Subject: [PATCH 2/2] add resource usage to nvptx-loader
---
libc/benchmarks/gpu/CMakeLists.txt | 18 ++----------------
libc/utils/gpu/loader/Loader.h | 1 +
libc/utils/gpu/loader/Main.cpp | 6 +++++-
libc/utils/gpu/loader/nvptx/Loader.cpp | 18 ++++++++++++++++++
4 files changed, 26 insertions(+), 17 deletions(-)
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index 7f006329df198..4790e55bec478 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -19,25 +19,11 @@ function(add_benchmark benchmark_name)
LINK_LIBRARIES
LibcGpuBenchmark.hermetic
${BENCHMARK_LINK_LIBRARIES}
+ LOADER_ARGS
+ "--print-resource-usage"
${BENCHMARK_UNPARSED_ARGUMENTS}
)
get_fq_target_name(${benchmark_name} fq_target_name)
- set(fq_build_target_name ${fq_target_name}.__build__)
-
- set(grep_res_usage_cmd grep -A 3 "${benchmark_name}")
- set(res_usage_cmd cuobjdump -res-usage $<TARGET_FILE:${fq_build_target_name}> | ${grep_res_usage_cmd})
- add_custom_command(
- OUTPUT ${fq_target_name}-cmd APPEND
- COMMAND ${res_usage_cmd}
- COMMAND_EXPAND_LISTS
- COMMENT "Reading resource usage for ${benchmark_name}"
- ${LIBC_HERMETIC_TEST_JOB_POOL}
- )
- # add_dependencies(${fq_target_name} ${fq_target_name}_resources)
- # set_source_files_properties(${fq_target_name}_resources
- # PROPERTIES
- # SYMBOLIC "TRUE"
- # )
add_dependencies(gpu-benchmark ${fq_target_name})
endfunction(add_benchmark)
diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h
index eae2776b2773f..f576c58d902a1 100644
--- a/libc/utils/gpu/loader/Loader.h
+++ b/libc/utils/gpu/loader/Loader.h
@@ -28,6 +28,7 @@ struct LaunchParameters {
uint32_t num_blocks_x;
uint32_t num_blocks_y;
uint32_t num_blocks_z;
+ bool print_resource_usage;
};
/// The arguments to the '_begin' kernel.
diff --git a/libc/utils/gpu/loader/Main.cpp b/libc/utils/gpu/loader/Main.cpp
index b711ec91c9f30..dfaee4d857826 100644
--- a/libc/utils/gpu/loader/Main.cpp
+++ b/libc/utils/gpu/loader/Main.cpp
@@ -20,7 +20,8 @@
int main(int argc, char **argv, char **envp) {
if (argc < 2) {
- printf("USAGE: ./loader [--threads <n>, --blocks <n>] <device_image> "
+ printf("USAGE: ./loader [--threads <n>, --blocks <n>, "
+ "--print-resource-usage] <device_image> "
"<args>, ...\n");
return EXIT_SUCCESS;
}
@@ -62,6 +63,9 @@ int main(int argc, char **argv, char **envp) {
offset + 1 < argc ? strtoul(argv[offset + 1], &ptr, 10) : 1;
offset++;
continue;
+ } else if (argv[offset] == std::string("--print-resource-usage")) {
+ params.print_resource_usage = true;
+ continue;
} else {
file = fopen(argv[offset], "r");
if (!file) {
diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp
index 012cb778ecf15..90e52ddb008da 100644
--- a/libc/utils/gpu/loader/nvptx/Loader.cpp
+++ b/libc/utils/gpu/loader/nvptx/Loader.cpp
@@ -229,6 +229,17 @@ CUresult launch_kernel(CUmodule binary, CUstream stream,
return CUDA_SUCCESS;
}
+void print_resource_usage(CUmodule binary, const char *kernel_name) {
+ CUfunction function;
+ if (CUresult err = cuModuleGetFunction(&function, binary, kernel_name))
+ handle_error(err);
+ int num_regs;
+ if (CUresult err =
+ cuFuncGetAttribute(&num_regs, CU_FUNC_ATTRIBUTE_NUM_REGS, function))
+ handle_error(err);
+ fprintf(stderr, "%6s registers: %d\n", kernel_name, num_regs);
+}
+
int load(int argc, char **argv, char **envp, void *image, size_t size,
const LaunchParameters ¶ms) {
if (CUresult err = cuInit(0))
@@ -341,6 +352,13 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
if (CUresult err = cuStreamSynchronize(stream))
handle_error(err);
+ // Print resource usage if requested.
+ if (params.print_resource_usage) {
+ print_resource_usage(binary, "_begin");
+ print_resource_usage(binary, "_start");
+ print_resource_usage(binary, "_end");
+ }
+
end_args_t fini_args = {host_ret};
if (CUresult err = launch_kernel(binary, stream, rpc_device,
single_threaded_params, "_end", fini_args))
More information about the libc-commits
mailing list