[libc-commits] [libc] [amdgpu][openmp] Avoiding writing to packet header twice (PR #70695)
via libc-commits
libc-commits at lists.llvm.org
Mon Oct 30 10:55:07 PDT 2023
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 0f8615f4dc568f4d7cbf73580eef3e78f64f3bd0 cddc917c07ace026a66d8080cddea32509b0aec0 -- libc/utils/gpu/loader/amdgpu/Loader.cpp openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index 80f513bb80f8..86a39a5a3be4 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -223,8 +223,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
// with one thread on the device, forcing the rest of the wavefront to be
// masked off.
uint16_t setup = (1 + (params.num_blocks_y * params.num_threads_y != 1) +
- (params.num_blocks_z * params.num_threads_z != 1))
- << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+ (params.num_blocks_z * params.num_threads_z != 1))
+ << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
packet->workgroup_size_x = params.num_threads_x;
packet->workgroup_size_y = params.num_threads_y;
packet->workgroup_size_z = params.num_threads_z;
@@ -249,8 +249,9 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
- uint32_t header_word = header | (setup << 16u)
- __atomic_store_n((uint32_t*)&packet->header, header_word, __ATOMIC_RELEASE);
+ uint32_t header_word =
+ header | (setup << 16u) __atomic_store_n((uint32_t *)&packet->header,
+ header_word, __ATOMIC_RELEASE);
hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);
// Wait until the kernel has completed execution on the device. Periodically
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index b763c6371cc3..b510922396ca 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -744,8 +744,7 @@ private:
/// Publish the kernel packet so that the HSA runtime can start processing
/// the kernel launch. Do not modify the packet once this function is called.
/// Assumes the queue lock is acquired.
- void publishKernelPacket(uint64_t PacketId,
- uint16_t Setup,
+ void publishKernelPacket(uint64_t PacketId, uint16_t Setup,
hsa_kernel_dispatch_packet_t *Packet) {
uint32_t *PacketPtr = reinterpret_cast<uint32_t *>(Packet);
``````````
</details>
https://github.com/llvm/llvm-project/pull/70695
More information about the libc-commits
mailing list