[Openmp-commits] [openmp] 197b7b2 - [NFC][libomptarget] move remaining device specific code out of omptarget-nvptx.h
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Fri Oct 25 10:58:38 PDT 2019
Author: Jon Chesterfield
Date: 2019-10-25T18:58:31+01:00
New Revision: 197b7b24c330a722cae76ded7c91f51d4d28192f
URL: https://github.com/llvm/llvm-project/commit/197b7b24c330a722cae76ded7c91f51d4d28192f
DIFF: https://github.com/llvm/llvm-project/commit/197b7b24c330a722cae76ded7c91f51d4d28192f.diff
LOG: [NFC][libomptarget] move remaining device specific code out of omptarget-nvptx.h
Summary:
[NFC][libomptarget] move remaining device specific code out of omptarget-nvptx.h
Strictly there is one remaining difference wrt amdgcn - parallelLevel is
volatile qualified on amdgcn and not on nvptx. Determining whether this is
correct - and how to represent the different semantics of 'volatile' under
various conditions - is beyond the scope of this code motion patch.
Reviewers: ABataev, jdoerfert, grokos
Subscribers: openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D69424
Added:
Modified:
openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 70e6c286a187..336206aa9413 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -15,16 +15,12 @@
#define __OMPTARGET_NVPTX_H
// std includes
-#include <stdint.h>
-#include <stdlib.h>
-
#include <inttypes.h>
-
-// cuda includes
-#include <cuda.h>
#include <math.h>
+#include <stdlib.h>
// local includes
+#include "target_impl.h"
#include "debug.h" // debug
#include "interface.h" // interfaces with omp, compiler, and user
#include "option.h" // choices we have
@@ -86,20 +82,6 @@ class omptarget_nvptx_SharedArgs {
extern __device__ __shared__ omptarget_nvptx_SharedArgs
omptarget_nvptx_globalArgs;
-// Data sharing related quantities, need to match what is used in the compiler.
-enum DATA_SHARING_SIZES {
- // The maximum number of workers in a kernel.
- DS_Max_Worker_Threads = 992,
- // The size reserved for data in a shared memory slot.
- DS_Slot_Size = 256,
- // The slot size that should be reserved for a working warp.
- DS_Worker_Warp_Slot_Size = WARPSIZE * DS_Slot_Size,
- // The maximum number of warps in use
- DS_Max_Warp_Number = 32,
- // The size of the preallocated shared memory buffer per team
- DS_Shared_Memory_Size = 128,
-};
-
// Data structure to keep in shared memory that traces the current slot, stack,
// and frame pointer as well as the active threads that didn't exit the current
// environment.
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index de2776e91ecf..4e7dc4e72ceb 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -12,10 +12,25 @@
#ifndef _TARGET_IMPL_H_
#define _TARGET_IMPL_H_
+#include <cuda.h>
#include <stdint.h>
#include "option.h"
+// Data sharing related quantities, need to match what is used in the compiler.
+enum DATA_SHARING_SIZES {
+ // The maximum number of workers in a kernel.
+ DS_Max_Worker_Threads = 992,
+ // The size reserved for data in a shared memory slot.
+ DS_Slot_Size = 256,
+ // The slot size that should be reserved for a working warp.
+ DS_Worker_Warp_Slot_Size = WARPSIZE * DS_Slot_Size,
+ // The maximum number of warps in use
+ DS_Max_Warp_Number = 32,
+ // The size of the preallocated shared memory buffer per team
+ DS_Shared_Memory_Size = 128,
+};
+
INLINE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
}
More information about the Openmp-commits
mailing list