[llvm] [openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics (PR #131907)
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 13:32:59 PDT 2025
https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/131907
Picked a few straightforward ones to get the ball moving, left the UNREACHABLE path unchanged.
>From 4d29d2e8d6f264adc82779071b7d09d552873460 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield at gmail.com>
Date: Tue, 18 Mar 2025 20:29:05 +0000
Subject: [PATCH] [openmp][nfc] Use clang gpuintrin for some dispatch to target
intrinsics
---
offload/DeviceRTL/src/Mapping.cpp | 86 +++++++------------------------
1 file changed, 18 insertions(+), 68 deletions(-)
diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index 641be81cca3ed..53031cbeaa696 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -16,6 +16,7 @@
#include "State.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
+#include "clang/lib/Headers/gpuintrin.h"
using namespace ompx;
@@ -27,22 +28,6 @@ namespace impl {
///{
#ifdef __AMDGPU__
-uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); }
-
-uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __builtin_amdgcn_workgroup_size_x();
- case 1:
- return __builtin_amdgcn_workgroup_size_y();
- case 2:
- return __builtin_amdgcn_workgroup_size_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
-LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
-
LaneMaskTy lanemaskLT() {
uint32_t Lane = mapping::getThreadIdInWarp();
int64_t Ballot = mapping::activemask();
@@ -59,22 +44,6 @@ LaneMaskTy lanemaskGT() {
return Mask & Ballot;
}
-uint32_t getThreadIdInWarp() {
- return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
-uint32_t getThreadIdInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __builtin_amdgcn_workitem_id_x();
- case 1:
- return __builtin_amdgcn_workitem_id_y();
- case 2:
- return __builtin_amdgcn_workitem_id_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
uint32_t getNumberOfThreadsInKernel() {
return __builtin_amdgcn_grid_size_x() * __builtin_amdgcn_grid_size_y() *
__builtin_amdgcn_grid_size_z();
@@ -120,40 +89,10 @@ uint32_t getNumberOfWarpsInBlock() {
///{
#ifdef __NVPTX__
-uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __nvvm_read_ptx_sreg_ntid_x();
- case 1:
- return __nvvm_read_ptx_sreg_ntid_y();
- case 2:
- return __nvvm_read_ptx_sreg_ntid_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
-uint32_t getWarpSize() { return __nvvm_read_ptx_sreg_warpsize(); }
-
-LaneMaskTy activemask() { return __nvvm_activemask(); }
-
LaneMaskTy lanemaskLT() { return __nvvm_read_ptx_sreg_lanemask_lt(); }
LaneMaskTy lanemaskGT() { return __nvvm_read_ptx_sreg_lanemask_gt(); }
-uint32_t getThreadIdInBlock(int32_t Dim) {
- switch (Dim) {
- case 0:
- return __nvvm_read_ptx_sreg_tid_x();
- case 1:
- return __nvvm_read_ptx_sreg_tid_y();
- case 2:
- return __nvvm_read_ptx_sreg_tid_z();
- };
- UNREACHABLE("Dim outside range!");
-}
-
-uint32_t getThreadIdInWarp() { return __nvvm_read_ptx_sreg_laneid(); }
-
uint32_t getBlockIdInKernel(int32_t Dim) {
switch (Dim) {
case 0:
@@ -236,24 +175,29 @@ bool mapping::isLeaderInWarp() {
return utils::popc(Active & LaneMaskLT) == 0;
}
-LaneMaskTy mapping::activemask() { return impl::activemask(); }
+LaneMaskTy mapping::activemask() { return __gpu_lane_mask(); }
LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
uint32_t mapping::getThreadIdInWarp() {
- uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
+ uint32_t ThreadIdInWarp = __gpu_lane_id();
ASSERT(ThreadIdInWarp < impl::getWarpSize(), nullptr);
return ThreadIdInWarp;
}
uint32_t mapping::getThreadIdInBlock(int32_t Dim) {
- uint32_t ThreadIdInBlock = impl::getThreadIdInBlock(Dim);
- return ThreadIdInBlock;
+ switch (Dim) {
+ case 0:
+ case 1:
+ case 2:
+ return __gpu_thread_id(Dim);
+ };
+ UNREACHABLE("Dim outside range!");
}
-uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
+uint32_t mapping::getWarpSize() { return __gpu_num_lanes(); }
uint32_t mapping::getMaxTeamThreads(bool IsSPMD) {
uint32_t BlockSize = mapping::getNumberOfThreadsInBlock();
@@ -265,7 +209,13 @@ uint32_t mapping::getMaxTeamThreads() {
}
uint32_t mapping::getNumberOfThreadsInBlock(int32_t Dim) {
- return impl::getNumberOfThreadsInBlock(Dim);
+ switch (Dim) {
+ case 0:
+ case 1:
+ case 2:
+ return __gpu_num_threads(Dim);
+ };
+ UNREACHABLE("Dim outside range!");
}
uint32_t mapping::getNumberOfThreadsInKernel() {
More information about the llvm-commits
mailing list