[Openmp-commits] [openmp] 8adae60 - [libomptarget][nfc] Extract function from data_sharing, move to common

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Wed Dec 18 11:39:48 PST 2019


Author: JonChesterfield
Date: 2019-12-18T19:39:35Z
New Revision: 8adae6027c0813df935e4f96067bab0051974910

URL: https://github.com/llvm/llvm-project/commit/8adae6027c0813df935e4f96067bab0051974910
DIFF: https://github.com/llvm/llvm-project/commit/8adae6027c0813df935e4f96067bab0051974910.diff

LOG: [libomptarget][nfc] Extract function from data_sharing, move to common

Summary:
[libomptarget][nfc] Extract function from data_sharing, move to common

Finding the first active thread in the warp is different on nvptx and amdgcn,
mostly due to warp size and the desire for efficiency.

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: jdoerfert

Subscribers: jvesely, mgorny, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D71643

Added: 
    openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
    openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Removed: 
    openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 9eadbdb1e248..ebea0a049b6e 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -57,12 +57,13 @@ get_filename_component(devicertl_base_directory
 set(cuda_sources
   ${devicertl_base_directory}/common/src/cancel.cu
   ${devicertl_base_directory}/common/src/critical.cu
-  ${devicertl_base_directory}/common/src/loop.cu
+  ${devicertl_base_directory}/common/src/data_sharing.cu
   ${devicertl_base_directory}/common/src/libcall.cu
-  ${devicertl_base_directory}/common/src/reduction.cu
+  ${devicertl_base_directory}/common/src/loop.cu
   ${devicertl_base_directory}/common/src/omp_data.cu
   ${devicertl_base_directory}/common/src/omptarget.cu
   ${devicertl_base_directory}/common/src/parallel.cu
+  ${devicertl_base_directory}/common/src/reduction.cu
   ${devicertl_base_directory}/common/src/sync.cu
   ${devicertl_base_directory}/common/src/task.cu)
 

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 713a880d9a5c..40bbf943aef5 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -101,6 +101,8 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
   return __lanemask_gt();
 }
 
+EXTERN bool __kmpc_impl_is_first_active_thread();
+
 INLINE uint32_t __kmpc_impl_smid() {
   return __smid();
 }

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
similarity index 97%
rename from openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
rename to openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
index 6549d76def7c..c259c7707895 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
@@ -1,4 +1,4 @@
-//===----- data_sharing.cu - NVPTX OpenMP debug utilities -------- CUDA -*-===//
+//===----- data_sharing.cu - OpenMP GPU data sharing ------------- CUDA -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,21 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the implementation of data sharing environments/
+// This file contains the implementation of data sharing environments
 //
 //===----------------------------------------------------------------------===//
 #include "common/omptarget.h"
 #include "target_impl.h"
 #include <stdio.h>
 
-// Return true if this is the first active thread in the warp.
-INLINE static bool IsWarpMasterActiveThread() {
-  unsigned long long Mask = __kmpc_impl_activemask();
-  unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
-  unsigned long long Sh = Mask << ShNum;
-  // Truncate Sh to the 32 lower bits
-  return (unsigned)Sh == 0;
-}
 // Return true if this is the master thread.
 INLINE static bool IsMasterThread(bool isSPMDExecutionMode) {
   return !isSPMDExecutionMode && GetMasterThreadID() == GetThreadIdInBlock();
@@ -128,7 +120,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
   DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
 
   // Only the warp active master needs to grow the stack.
-  if (IsWarpMasterActiveThread()) {
+  if (__kmpc_impl_is_first_active_thread()) {
     // Save the current active threads.
     ActiveT = CurActiveThreads;
 
@@ -229,7 +221,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
   unsigned WID = GetWarpId();
 
   if (IsEntryPoint) {
-    if (IsWarpMasterActiveThread()) {
+    if (__kmpc_impl_is_first_active_thread()) {
       DSPRINT0(DSFLAG, "Doing clean up\n");
 
       // The master thread cleans the saved slot, because this is an environment
@@ -255,7 +247,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
   // warp diverged and returns in 
diff erent places). This only works if we
   // assume that threads will converge right after the call site that started
   // the environment.
-  if (IsWarpMasterActiveThread()) {
+  if (__kmpc_impl_is_first_active_thread()) {
     __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
 
     DSPRINT0(DSFLAG, "Before restoring the stack\n");

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index d38d766a7803..84b52f55b73d 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -53,9 +53,8 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
   set(cuda_src_files
       ${devicertl_common_directory}/src/cancel.cu
       ${devicertl_common_directory}/src/critical.cu
-      src/data_sharing.cu
+      ${devicertl_common_directory}/src/data_sharing.cu
       ${devicertl_common_directory}/src/libcall.cu
-      src/target_impl.cu
       ${devicertl_common_directory}/src/loop.cu
       ${devicertl_common_directory}/src/omptarget.cu
       ${devicertl_common_directory}/src/parallel.cu
@@ -63,6 +62,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
       ${devicertl_common_directory}/src/support.cu
       ${devicertl_common_directory}/src/sync.cu
       ${devicertl_common_directory}/src/task.cu
+      src/target_impl.cu
   )
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 350d2cf5f2e1..6f6c38956a94 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -94,6 +94,15 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
   return res;
 }
 
+// Return true if this is the first active thread in the warp.
+INLINE bool __kmpc_impl_is_first_active_thread() {
+  unsigned long long Mask = __kmpc_impl_activemask();
+  unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
+  unsigned long long Sh = Mask << ShNum;
+  // Truncate Sh to the 32 lower bits
+  return (unsigned)Sh == 0;
+}
+
 INLINE uint32_t __kmpc_impl_smid() {
   uint32_t id;
   asm("mov.u32 %0, %%smid;" : "=r"(id));


        


More information about the Openmp-commits mailing list