[llvm] [openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics (PR #131907)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 13:33:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Jon Chesterfield (JonChesterfield)
<details>
<summary>Changes</summary>
Picked a few straightforward ones to get the ball moving, left the UNREACHABLE path unchanged.
---
Full diff: https://github.com/llvm/llvm-project/pull/131907.diff
1 Files Affected:
- (modified) offload/DeviceRTL/src/Mapping.cpp (+18-68)
``````````diff
diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index 641be81cca3ed..53031cbeaa696 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -16,6 +16,7 @@
#include "State.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
+#include "clang/lib/Headers/gpuintrin.h"
using namespace ompx;
@@ -27,22 +28,6 @@ namespace impl {
///{
#ifdef __AMDGPU__
-uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); }
-
-uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __builtin_amdgcn_workgroup_size_x();
- case 1:
- return __builtin_amdgcn_workgroup_size_y();
- case 2:
- return __builtin_amdgcn_workgroup_size_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
-LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
-
LaneMaskTy lanemaskLT() {
uint32_t Lane = mapping::getThreadIdInWarp();
int64_t Ballot = mapping::activemask();
@@ -59,22 +44,6 @@ LaneMaskTy lanemaskGT() {
return Mask & Ballot;
}
-uint32_t getThreadIdInWarp() {
- return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
-uint32_t getThreadIdInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __builtin_amdgcn_workitem_id_x();
- case 1:
- return __builtin_amdgcn_workitem_id_y();
- case 2:
- return __builtin_amdgcn_workitem_id_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
uint32_t getNumberOfThreadsInKernel() {
return __builtin_amdgcn_grid_size_x() * __builtin_amdgcn_grid_size_y() *
__builtin_amdgcn_grid_size_z();
@@ -120,40 +89,10 @@ uint32_t getNumberOfWarpsInBlock() {
///{
#ifdef __NVPTX__
-uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __nvvm_read_ptx_sreg_ntid_x();
- case 1:
- return __nvvm_read_ptx_sreg_ntid_y();
- case 2:
- return __nvvm_read_ptx_sreg_ntid_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
-uint32_t getWarpSize() { return __nvvm_read_ptx_sreg_warpsize(); }
-
-LaneMaskTy activemask() { return __nvvm_activemask(); }
-
LaneMaskTy lanemaskLT() { return __nvvm_read_ptx_sreg_lanemask_lt(); }
LaneMaskTy lanemaskGT() { return __nvvm_read_ptx_sreg_lanemask_gt(); }
-uint32_t getThreadIdInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __nvvm_read_ptx_sreg_tid_x();
- case 1:
- return __nvvm_read_ptx_sreg_tid_y();
- case 2:
- return __nvvm_read_ptx_sreg_tid_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
-uint32_t getThreadIdInWarp() { return __nvvm_read_ptx_sreg_laneid(); }
-
uint32_t getBlockIdInKernel(int32_t Dim) {
switch (Dim) {
case 0:
@@ -236,24 +175,29 @@ bool mapping::isLeaderInWarp() {
return utils::popc(Active & LaneMaskLT) == 0;
}
-LaneMaskTy mapping::activemask() { return impl::activemask(); }
+LaneMaskTy mapping::activemask() { return __gpu_lane_mask(); }
LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
uint32_t mapping::getThreadIdInWarp() {
- uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
+ uint32_t ThreadIdInWarp = __gpu_lane_id();
ASSERT(ThreadIdInWarp < impl::getWarpSize(), nullptr);
return ThreadIdInWarp;
}
uint32_t mapping::getThreadIdInBlock(int32_t Dim) {
- uint32_t ThreadIdInBlock = impl::getThreadIdInBlock(Dim);
- return ThreadIdInBlock;
+ switch (Dim) {
+ case 0:
+ case 1:
+ case 2:
+ return __gpu_thread_id(Dim);
+ };
+ UNREACHABLE("Dim outside range!");
}
-uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
+uint32_t mapping::getWarpSize() { return __gpu_num_lanes(); }
uint32_t mapping::getMaxTeamThreads(bool IsSPMD) {
uint32_t BlockSize = mapping::getNumberOfThreadsInBlock();
@@ -265,7 +209,13 @@ uint32_t mapping::getMaxTeamThreads() {
}
uint32_t mapping::getNumberOfThreadsInBlock(int32_t Dim) {
- return impl::getNumberOfThreadsInBlock(Dim);
+ switch (Dim) {
+ case 0:
+ case 1:
+ case 2:
+ return __gpu_num_threads(Dim);
+ };
+ UNREACHABLE("Dim outside range!");
}
uint32_t mapping::getNumberOfThreadsInKernel() {
``````````
</details>
https://github.com/llvm/llvm-project/pull/131907
More information about the llvm-commits
mailing list