[Openmp-commits] [openmp] ad4c426 - [nfc][libomptarget] Inline option into target_impl

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Sun Oct 27 07:27:03 PDT 2019


Author: Jon Chesterfield
Date: 2019-10-27T14:26:55Z
New Revision: ad4c42666dd8ace31d0283765b75ef9146f2179b

URL: https://github.com/llvm/llvm-project/commit/ad4c42666dd8ace31d0283765b75ef9146f2179b
DIFF: https://github.com/llvm/llvm-project/commit/ad4c42666dd8ace31d0283765b75ef9146f2179b.diff

LOG: [nfc][libomptarget] Inline option into target_impl

Summary:
[nfc][libomptarget] Inline option into target_impl

Subset of D69423. The macros that were in option.h are all target dependent.
Inlining the header simplifies the dependency graph when looking to move code
into a common subdir.

Reviewers: ABataev, jdoerfert, grokos

Subscribers: openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D69472

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
    openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
    openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Removed: 
    openmp/libomptarget/deviceRTLs/nvptx/src/option.h


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
index 123c15161ca4..a2f1d37fdad2 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
@@ -126,7 +126,7 @@
 
 #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
 #include <stdio.h>
-#include "option.h"
+#include "target_impl.h"
 
 template <typename... Arguments>
 NOINLINE static void log(const char *fmt, Arguments... parameters) {

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 336206aa9413..5006aa4a6cce 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -23,7 +23,6 @@
 #include "target_impl.h"
 #include "debug.h"     // debug
 #include "interface.h" // interfaces with omp, compiler, and user
-#include "option.h"    // choices we have
 #include "state-queue.h"
 #include "support.h"
 

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h
deleted file mode 100644
index 37d1134f44bc..000000000000
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===------------ option.h - NVPTX OpenMP GPU options ------------ CUDA -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// GPU default options
-//
-//===----------------------------------------------------------------------===//
-#ifndef _OPTION_H_
-#define _OPTION_H_
-
-#include "interface.h"
-
-////////////////////////////////////////////////////////////////////////////////
-// Kernel options
-////////////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////////////
-// The following def must match the absolute limit hardwired in the host RTL
-// max number of threads per team
-#define MAX_THREADS_PER_TEAM 1024
-
-#define WARPSIZE 32
-
-// The named barrier for active parallel threads of a team in an L1 parallel
-// region to synchronize with each other.
-#define L1_BARRIER (1)
-
-// Maximum number of preallocated arguments to an outlined parallel/simd function.
-// Anything more requires dynamic memory allocation.
-#define MAX_SHARED_ARGS 20
-
-// Maximum number of omp state objects per SM allocated statically in global
-// memory.
-#if __CUDA_ARCH__ >= 700
-#define OMP_STATE_COUNT 32
-#define MAX_SM 84
-#elif __CUDA_ARCH__ >= 600
-#define OMP_STATE_COUNT 32
-#define MAX_SM 56
-#else
-#define OMP_STATE_COUNT 16
-#define MAX_SM 16
-#endif
-
-#define OMP_ACTIVE_PARALLEL_LEVEL 128
-
-////////////////////////////////////////////////////////////////////////////////
-// algo options
-////////////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////////////
-// misc options (by def everythig here is device)
-////////////////////////////////////////////////////////////////////////////////
-
-#define INLINE __forceinline__ __device__
-#define NOINLINE __noinline__ __device__
-
-#endif

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h b/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h
index 9d7576bcd76e..8320929cfaf3 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h
@@ -21,7 +21,7 @@
 
 #include <stdint.h>
 
-#include "option.h" // choices we have
+#include "target_impl.h"
 
 template <typename ElementType, uint32_t SIZE> class omptarget_nvptx_Queue {
 private:

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 4e7dc4e72ceb..1a5d69a3ad57 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -15,7 +15,42 @@
 #include <cuda.h>
 #include <stdint.h>
 
-#include "option.h"
+#define INLINE __forceinline__ __device__
+#define NOINLINE __noinline__ __device__
+
+////////////////////////////////////////////////////////////////////////////////
+// Kernel options
+////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////
+// The following def must match the absolute limit hardwired in the host RTL
+// max number of threads per team
+#define MAX_THREADS_PER_TEAM 1024
+
+#define WARPSIZE 32
+
+// The named barrier for active parallel threads of a team in an L1 parallel
+// region to synchronize with each other.
+#define L1_BARRIER (1)
+
+// Maximum number of preallocated arguments to an outlined parallel/simd function.
+// Anything more requires dynamic memory allocation.
+#define MAX_SHARED_ARGS 20
+
+// Maximum number of omp state objects per SM allocated statically in global
+// memory.
+#if __CUDA_ARCH__ >= 700
+#define OMP_STATE_COUNT 32
+#define MAX_SM 84
+#elif __CUDA_ARCH__ >= 600
+#define OMP_STATE_COUNT 32
+#define MAX_SM 56
+#else
+#define OMP_STATE_COUNT 16
+#define MAX_SM 16
+#endif
+
+#define OMP_ACTIVE_PARALLEL_LEVEL 128
 
 // Data sharing related quantities, need to match what is used in the compiler.
 enum DATA_SHARING_SIZES {


        


More information about the Openmp-commits mailing list