[libc-commits] [libc] [libc] Add single threaded kernel attributes to AMDGPU startup utility (PR #104651)
Joseph Huber via libc-commits
libc-commits at lists.llvm.org
Sat Aug 17 05:20:17 PDT 2024
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/104651
>From 6384b983e370065d4474d10eccf87bbfa742c024 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 16 Aug 2024 18:43:44 -0500
Subject: [PATCH 1/2] [libc] Add single threaded kernel attributes to AMDGPU
startup utility
Summary:
I fixed the errors here recently so I can actually use these. This
shouldn't impact much, just should hopefully make the code generated
slightly better.
---
libc/startup/gpu/amdgpu/start.cpp | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp
index 5aaa7e938d2792..f6f7ee39e32fab 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -41,7 +41,10 @@ static void call_fini_array_callbacks() {
} // namespace LIBC_NAMESPACE_DECL
-extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
+extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
+ clang::amdgpu_flat_work_group_size(1, 1),
+ clang::amdgpu_max_num_work_groups(1),
+ clang::amdgpu_waves_per_eu(1, 1)]] void
_begin(int argc, char **argv, char **env) {
__atomic_store_n(&LIBC_NAMESPACE::app.env_ptr,
reinterpret_cast<uintptr_t *>(env), __ATOMIC_RELAXED);
@@ -60,7 +63,10 @@ _start(int argc, char **argv, char **envp, int *ret) {
__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
}
-extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
+extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
+ clang::amdgpu_flat_work_group_size(1, 1),
+ clang::amdgpu_max_num_work_groups(1),
+ clang::amdgpu_waves_per_eu(1, 1)]] void
_end(int retval) {
// Only a single thread should call `exit` here, the rest should gracefully
// return from the kernel. This is so only one thread calls the destructors
>From 921541ddd995c5abb46359178042b9f9bd7771c7 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Sat, 17 Aug 2024 07:20:08 -0500
Subject: [PATCH 2/2] Update start.cpp
---
libc/startup/gpu/amdgpu/start.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp
index f6f7ee39e32fab..f09541b0d55808 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -43,8 +43,7 @@ static void call_fini_array_callbacks() {
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
clang::amdgpu_flat_work_group_size(1, 1),
- clang::amdgpu_max_num_work_groups(1),
- clang::amdgpu_waves_per_eu(1, 1)]] void
+ clang::amdgpu_max_num_work_groups(1)]] void
_begin(int argc, char **argv, char **env) {
__atomic_store_n(&LIBC_NAMESPACE::app.env_ptr,
reinterpret_cast<uintptr_t *>(env), __ATOMIC_RELAXED);
@@ -65,8 +64,7 @@ _start(int argc, char **argv, char **envp, int *ret) {
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
clang::amdgpu_flat_work_group_size(1, 1),
- clang::amdgpu_max_num_work_groups(1),
- clang::amdgpu_waves_per_eu(1, 1)]] void
+ clang::amdgpu_max_num_work_groups(1)]] void
_end(int retval) {
// Only a single thread should call `exit` here, the rest should gracefully
// return from the kernel. This is so only one thread calls the destructors
More information about the libc-commits
mailing list