[Openmp-commits] [openmp] 93bebdc - [OpenMP][NFCI] Cleanup new device RT mapping interface
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Thu Nov 4 15:55:17 PDT 2021
Author: Johannes Doerfert
Date: 2021-11-04T17:54:53-05:00
New Revision: 93bebdc78f12459a85eb94eccbaf50044ac4bd5d
URL: https://github.com/llvm/llvm-project/commit/93bebdc78f12459a85eb94eccbaf50044ac4bd5d
DIFF: https://github.com/llvm/llvm-project/commit/93bebdc78f12459a85eb94eccbaf50044ac4bd5d.diff
LOG: [OpenMP][NFCI] Cleanup new device RT mapping interface
Minimize the `impl` interface and clean up some uses of mapping
functions.
Reviewed By: jhuber6
Differential Revision: https://reviews.llvm.org/D112154
Added:
Modified:
openmp/libomptarget/DeviceRTL/src/Mapping.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index be937d1ca69e..bece29489a6d 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "Mapping.h"
+#include "Interface.h"
#include "State.h"
#include "Types.h"
#include "Utils.h"
@@ -43,6 +44,12 @@ uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
return (r < group_size) ? r : group_size;
}
+uint32_t getNumHardwareThreadsInBlock() {
+ return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
+ __builtin_amdgcn_grid_size_x(),
+ __builtin_amdgcn_workgroup_size_x());
+}
+
LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
LaneMaskTy lanemaskLT() {
@@ -67,13 +74,6 @@ uint32_t getThreadIdInWarp() {
uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
-uint32_t getBlockSize() {
- // TODO: verify this logic for generic mode.
- return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
- __builtin_amdgcn_grid_size_x(),
- __builtin_amdgcn_workgroup_size_x());
-}
-
uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
@@ -83,12 +83,8 @@ uint32_t getNumberOfBlocks() {
__builtin_amdgcn_workgroup_size_x());
}
-uint32_t getNumberOfProcessorElements() {
- return getBlockSize();
-}
-
uint32_t getWarpId() {
- return mapping::getThreadIdInBlock() / mapping::getWarpSize();
+ return impl::getThreadIdInBlock() / mapping::getWarpSize();
}
uint32_t getNumberOfWarpsInBlock() {
@@ -104,6 +100,10 @@ uint32_t getNumberOfWarpsInBlock() {
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+uint32_t getNumHardwareThreadsInBlock() {
+ return __nvvm_read_ptx_sreg_ntid_x();
+}
+
constexpr const llvm::omp::GV &getGridValue() {
return llvm::omp::NVPTXGridValues;
}
@@ -126,29 +126,23 @@ LaneMaskTy lanemaskGT() {
return Res;
}
-uint32_t getThreadIdInWarp() {
- return mapping::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
-}
-
uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
-uint32_t getBlockSize() {
- return __nvvm_read_ptx_sreg_ntid_x() -
- (!mapping::isSPMDMode() * mapping::getWarpSize());
+uint32_t getThreadIdInWarp() {
+ return impl::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
}
-uint32_t getKernelSize() { return __nvvm_read_ptx_sreg_nctaid_x(); }
+uint32_t getKernelSize() {
+ return __nvvm_read_ptx_sreg_nctaid_x() *
+ mapping::getNumberOfProcessorElements();
+}
uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
-uint32_t getNumberOfProcessorElements() {
- return __nvvm_read_ptx_sreg_ntid_x();
-}
-
uint32_t getWarpId() {
- return mapping::getThreadIdInBlock() / mapping::getWarpSize();
+ return impl::getThreadIdInBlock() / mapping::getWarpSize();
}
uint32_t getNumberOfWarpsInBlock() {
@@ -164,6 +158,10 @@ uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
} // namespace impl
} // namespace _OMP
+/// We have to be deliberate about the distinction of `mapping::` and `impl::`
+/// below to avoid repeating assumptions or including irrelevant ones.
+///{
+
static bool isInLastWarp() {
uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
~(mapping::getWarpSize() - 1);
@@ -200,30 +198,60 @@ LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
-uint32_t mapping::getThreadIdInWarp() { return impl::getThreadIdInWarp(); }
+uint32_t mapping::getThreadIdInWarp() {
+ uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
+ ASSERT(ThreadIdInWarp < impl::getWarpSize());
+ return ThreadIdInWarp;
+}
+
+uint32_t mapping::getThreadIdInBlock() {
+ uint32_t ThreadIdInBlock = impl::getThreadIdInBlock();
+ ASSERT(ThreadIdInBlock < impl::getNumHardwareThreadsInBlock());
+ return ThreadIdInBlock;
+}
-uint32_t mapping::getThreadIdInBlock() { return impl::getThreadIdInBlock(); }
+uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
-uint32_t mapping::getBlockSize() { return impl::getBlockSize(); }
+uint32_t mapping::getBlockSize() {
+ uint32_t BlockSize = mapping::getNumberOfProcessorElements() -
+ (!mapping::isSPMDMode() * impl::getWarpSize());
+ return BlockSize;
+}
uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
-uint32_t mapping::getBlockId() { return impl::getBlockId(); }
-
-uint32_t mapping::getNumberOfBlocks() { return impl::getNumberOfBlocks(); }
+uint32_t mapping::getWarpId() {
+ uint32_t WarpID = impl::getWarpId();
+ ASSERT(WarpID < impl::getNumberOfWarpsInBlock());
+ return WarpID;
+}
-uint32_t mapping::getNumberOfProcessorElements() {
- return impl::getNumberOfProcessorElements();
+uint32_t mapping::getBlockId() {
+ uint32_t BlockId = impl::getBlockId();
+ ASSERT(BlockId < impl::getNumberOfBlocks());
+ return BlockId;
}
-uint32_t mapping::getWarpId() { return impl::getWarpId(); }
+uint32_t mapping::getNumberOfWarpsInBlock() {
+ uint32_t NumberOfWarpsInBlocks = impl::getNumberOfWarpsInBlock();
+ ASSERT(impl::getWarpId() < NumberOfWarpsInBlocks);
+ return NumberOfWarpsInBlocks;
+}
-uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
+uint32_t mapping::getNumberOfBlocks() {
+ uint32_t NumberOfBlocks = impl::getNumberOfBlocks();
+ ASSERT(impl::getBlockId() < NumberOfBlocks);
+ return NumberOfBlocks;
+}
-uint32_t mapping::getNumberOfWarpsInBlock() {
- return impl::getNumberOfWarpsInBlock();
+uint32_t mapping::getNumberOfProcessorElements() {
+ uint32_t NumberOfProcessorElements = impl::getNumHardwareThreadsInBlock();
+ ASSERT(impl::getThreadIdInBlock() < NumberOfProcessorElements);
+ return NumberOfProcessorElements;
}
+///}
+
/// Execution mode
///
///{
@@ -247,7 +275,7 @@ __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
FunctionTracingRAII();
- return mapping::getNumberOfProcessorElements();
+ return impl::getNumHardwareThreadsInBlock();
}
}
#pragma omp end declare target
More information about the Openmp-commits
mailing list