[Openmp-commits] [PATCH] D69476: [nfc][libomptarget] Warp size aware logging. Last part of D69423

Jon Chesterfield via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Sun Oct 27 08:48:40 PDT 2019


JonChesterfield updated this revision to Diff 226575.
JonChesterfield added a comment.

- Use cuda builtin_var abstraction everywhere


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69476/new/

https://reviews.llvm.org/D69476

Files:
  openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
  openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h


Index: openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -104,9 +104,9 @@
 
 INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
 
-INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
+INLINE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
 
-INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
+INLINE unsigned GetLaneId() { return GetThreadIdInBlock() % WARPSIZE; }
 
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -122,7 +122,9 @@
 //      If NumThreads is 1024, master id is 992.
 //
 // Called in Generic Execution Mode only.
-INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
+INLINE int GetMasterThreadID() {
+  return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1);
+}
 
 // The last warp is reserved for the master; other warps are workers.
 // Called in Generic Execution Mode only.
Index: openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -364,7 +364,7 @@
     for (;;) {
       now = clock();
       clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
-      if (cycles >= __OMP_SPIN * blockIdx.x) {
+      if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
         break;
       }
     }
Index: openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
@@ -128,12 +128,12 @@
 
 #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
 #include <stdio.h>
-#include "target_impl.h"
+#include "support.h"
 
 template <typename... Arguments>
 NOINLINE static void log(const char *fmt, Arguments... parameters) {
-  printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE),
-         (int)(threadIdx.x & 0x1F), parameters...);
+  printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
+         (int)(GetWarpId()), (int)(GetLaneId()), parameters...);
 }
 
 #endif
@@ -144,9 +144,8 @@
 NOINLINE static void check(bool cond, const char *fmt,
                            Arguments... parameters) {
   if (!cond)
-    printf(fmt, (int)blockIdx.x, (int)threadIdx.x,
-           (int)(threadIdx.x / WARPSIZE), (int)(threadIdx.x & 0x1F),
-           parameters...);
+    printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
+           (int)(GetWarpId()), (int)(GetLaneId()), parameters...);
   assert(cond);
 }
 
Index: openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -11,13 +11,9 @@
 //===----------------------------------------------------------------------===//
 #include "omptarget-nvptx.h"
 #include "target_impl.h"
+#include "support.h"
 #include <stdio.h>
 
-// Warp ID in the CUDA block
-INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; }
-// Lane ID in the CUDA warp.
-INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; }
-
 // Return true if this is the first active thread in the warp.
 INLINE static bool IsWarpMasterActiveThread() {
   unsigned long long Mask = __kmpc_impl_activemask();
@@ -358,7 +354,7 @@
   // This function initializes the stack pointer with the pointer to the
   // statically allocated shared memory slots. The size of a shared memory
   // slot is pre-determined to be 256 bytes.
-  if (threadIdx.x == 0)
+  if (GetThreadIdInBlock() == 0)
     data_sharing_init_stack_common();
 
   __threadfence_block();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D69476.226575.patch
Type: text/x-patch
Size: 4036 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20191027/2b1907f0/attachment-0001.bin>


More information about the Openmp-commits mailing list