[Openmp-commits] [openmp] 93bebdc - [OpenMP][NFCI] Cleanup new device RT mapping interface

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Thu Nov 4 15:55:17 PDT 2021


Author: Johannes Doerfert
Date: 2021-11-04T17:54:53-05:00
New Revision: 93bebdc78f12459a85eb94eccbaf50044ac4bd5d

URL: https://github.com/llvm/llvm-project/commit/93bebdc78f12459a85eb94eccbaf50044ac4bd5d
DIFF: https://github.com/llvm/llvm-project/commit/93bebdc78f12459a85eb94eccbaf50044ac4bd5d.diff

LOG: [OpenMP][NFCI] Cleanup new device RT mapping interface

Minimize the `impl` interface and clean up some uses of mapping
functions.

Reviewed By: jhuber6

Differential Revision: https://reviews.llvm.org/D112154

Added: 
    

Modified: 
    openmp/libomptarget/DeviceRTL/src/Mapping.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index be937d1ca69e..bece29489a6d 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -10,6 +10,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mapping.h"
+#include "Interface.h"
 #include "State.h"
 #include "Types.h"
 #include "Utils.h"
@@ -43,6 +44,12 @@ uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
   return (r < group_size) ? r : group_size;
 }
 
+uint32_t getNumHardwareThreadsInBlock() {
+  return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
+                         __builtin_amdgcn_grid_size_x(),
+                         __builtin_amdgcn_workgroup_size_x());
+}
+
 LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
 
 LaneMaskTy lanemaskLT() {
@@ -67,13 +74,6 @@ uint32_t getThreadIdInWarp() {
 
 uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
 
-uint32_t getBlockSize() {
-  // TODO: verify this logic for generic mode.
-  return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
-                         __builtin_amdgcn_grid_size_x(),
-                         __builtin_amdgcn_workgroup_size_x());
-}
-
 uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
 
 uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
@@ -83,12 +83,8 @@ uint32_t getNumberOfBlocks() {
                     __builtin_amdgcn_workgroup_size_x());
 }
 
-uint32_t getNumberOfProcessorElements() {
-  return getBlockSize();
-}
-
 uint32_t getWarpId() {
-  return mapping::getThreadIdInBlock() / mapping::getWarpSize();
+  return impl::getThreadIdInBlock() / mapping::getWarpSize();
 }
 
 uint32_t getNumberOfWarpsInBlock() {
@@ -104,6 +100,10 @@ uint32_t getNumberOfWarpsInBlock() {
 #pragma omp begin declare variant match(                                       \
     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 
+uint32_t getNumHardwareThreadsInBlock() {
+  return __nvvm_read_ptx_sreg_ntid_x();
+}
+
 constexpr const llvm::omp::GV &getGridValue() {
   return llvm::omp::NVPTXGridValues;
 }
@@ -126,29 +126,23 @@ LaneMaskTy lanemaskGT() {
   return Res;
 }
 
-uint32_t getThreadIdInWarp() {
-  return mapping::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
-}
-
 uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
 
-uint32_t getBlockSize() {
-  return __nvvm_read_ptx_sreg_ntid_x() -
-         (!mapping::isSPMDMode() * mapping::getWarpSize());
+uint32_t getThreadIdInWarp() {
+  return impl::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
 }
 
-uint32_t getKernelSize() { return __nvvm_read_ptx_sreg_nctaid_x(); }
+uint32_t getKernelSize() {
+  return __nvvm_read_ptx_sreg_nctaid_x() *
+         mapping::getNumberOfProcessorElements();
+}
 
 uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
 
 uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
 
-uint32_t getNumberOfProcessorElements() {
-  return __nvvm_read_ptx_sreg_ntid_x();
-}
-
 uint32_t getWarpId() {
-  return mapping::getThreadIdInBlock() / mapping::getWarpSize();
+  return impl::getThreadIdInBlock() / mapping::getWarpSize();
 }
 
 uint32_t getNumberOfWarpsInBlock() {
@@ -164,6 +158,10 @@ uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
 } // namespace impl
 } // namespace _OMP
 
+/// We have to be deliberate about the distinction of `mapping::` and `impl::`
+/// below to avoid repeating assumptions or including irrelevant ones.
+///{
+
 static bool isInLastWarp() {
   uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
                      ~(mapping::getWarpSize() - 1);
@@ -200,30 +198,60 @@ LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
 
 LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
 
-uint32_t mapping::getThreadIdInWarp() { return impl::getThreadIdInWarp(); }
+uint32_t mapping::getThreadIdInWarp() {
+  uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
+  ASSERT(ThreadIdInWarp < impl::getWarpSize());
+  return ThreadIdInWarp;
+}
+
+uint32_t mapping::getThreadIdInBlock() {
+  uint32_t ThreadIdInBlock = impl::getThreadIdInBlock();
+  ASSERT(ThreadIdInBlock < impl::getNumHardwareThreadsInBlock());
+  return ThreadIdInBlock;
+}
 
-uint32_t mapping::getThreadIdInBlock() { return impl::getThreadIdInBlock(); }
+uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
 
-uint32_t mapping::getBlockSize() { return impl::getBlockSize(); }
+uint32_t mapping::getBlockSize() {
+  uint32_t BlockSize = mapping::getNumberOfProcessorElements() -
+                       (!mapping::isSPMDMode() * impl::getWarpSize());
+  return BlockSize;
+}
 
 uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
 
-uint32_t mapping::getBlockId() { return impl::getBlockId(); }
-
-uint32_t mapping::getNumberOfBlocks() { return impl::getNumberOfBlocks(); }
+uint32_t mapping::getWarpId() {
+  uint32_t WarpID = impl::getWarpId();
+  ASSERT(WarpID < impl::getNumberOfWarpsInBlock());
+  return WarpID;
+}
 
-uint32_t mapping::getNumberOfProcessorElements() {
-  return impl::getNumberOfProcessorElements();
+uint32_t mapping::getBlockId() {
+  uint32_t BlockId = impl::getBlockId();
+  ASSERT(BlockId < impl::getNumberOfBlocks());
+  return BlockId;
 }
 
-uint32_t mapping::getWarpId() { return impl::getWarpId(); }
+uint32_t mapping::getNumberOfWarpsInBlock() {
+  uint32_t NumberOfWarpsInBlocks = impl::getNumberOfWarpsInBlock();
+  ASSERT(impl::getWarpId() < NumberOfWarpsInBlocks);
+  return NumberOfWarpsInBlocks;
+}
 
-uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
+uint32_t mapping::getNumberOfBlocks() {
+  uint32_t NumberOfBlocks = impl::getNumberOfBlocks();
+  ASSERT(impl::getBlockId() < NumberOfBlocks);
+  return NumberOfBlocks;
+}
 
-uint32_t mapping::getNumberOfWarpsInBlock() {
-  return impl::getNumberOfWarpsInBlock();
+uint32_t mapping::getNumberOfProcessorElements() {
+  uint32_t NumberOfProcessorElements = impl::getNumHardwareThreadsInBlock();
+  ASSERT(impl::getThreadIdInBlock() < NumberOfProcessorElements);
+  return NumberOfProcessorElements;
 }
 
+///}
+
 /// Execution mode
 ///
 ///{
@@ -247,7 +275,7 @@ __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
 
 __attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
   FunctionTracingRAII();
-  return mapping::getNumberOfProcessorElements();
+  return impl::getNumHardwareThreadsInBlock();
 }
 }
 #pragma omp end declare target


        


More information about the Openmp-commits mailing list