[clang] 7f1e6fc - [OpenMP] Use __OPENMP_NVPTX__ instead of _OPENMP in wrapper headers

Fri Jul 10 16:55:25 PDT 2020

Author: Johannes Doerfert
Date: 2020-07-10T18:53:34-05:00
New Revision: 7f1e6fcff9427adfa8efa3bfeeeac801da788b87

URL: https://github.com/llvm/llvm-project/commit/7f1e6fcff9427adfa8efa3bfeeeac801da788b87
DIFF: https://github.com/llvm/llvm-project/commit/7f1e6fcff9427adfa8efa3bfeeeac801da788b87.diff

LOG: [OpenMP] Use __OPENMP_NVPTX__ instead of _OPENMP in wrapper headers

Due to recent changes we cannot use OpenMP in CUDA files anymore
(PR45533) as the math handling of CUDA is different when _OPENMP is
defined. We actually want this different behavior only if we are
offloading with OpenMP to NVIDIA, thus generating NVPTX. With this patch
we do not interfere with the CUDA math handling except if we are in
NVPTX offloading mode, as indicated by the presence of __OPENMP_NVPTX__.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D78155

Added: 
    

Modified: 
    clang/lib/Headers/__clang_cuda_cmath.h
    clang/lib/Headers/__clang_cuda_device_functions.h
    clang/lib/Headers/__clang_cuda_libdevice_declares.h
    clang/lib/Headers/__clang_cuda_math.h
    clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
    clang/lib/Headers/openmp_wrappers/cmath
    clang/lib/Headers/openmp_wrappers/math.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h
index f406112164e5..8ba182689a4f 100644

--- a/clang/lib/Headers/__clang_cuda_cmath.h
+++ b/clang/lib/Headers/__clang_cuda_cmath.h
@@ -12,7 +12,7 @@
 #error "This file is for CUDA compilation only."
 #endif
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 #include <limits>
 #endif
 
@@ -32,7 +32,7 @@
 // implementation.  Declaring in the global namespace and pulling into namespace
 // std covers all of the known knowns.
 
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
@@ -69,7 +69,7 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
 // Windows. For OpenMP we omit these as some old system headers have
 // non-conforming `isinf(float)` and `isnan(float)` implementations that return
 // an `int`. The system versions of these functions should be fine anyway.
-#if !defined(_MSC_VER) && !defined(_OPENMP)
+#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__)
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
@@ -146,7 +146,7 @@ __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 // libdevice doesn't provide an implementation, and we don't want to be in the
 // business of implementing tricky libm functions in this header.
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 
 // Now we've defined everything we promised we'd define in
 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
@@ -463,7 +463,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
 #endif
 
-#endif // _OPENMP
+#endif // __OPENMP_NVPTX__
 
 #undef __DEVICE__
 

diff  --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h
index 76c588997f18..f801e5426aa4 100644
--- a/clang/lib/Headers/__clang_cuda_device_functions.h
+++ b/clang/lib/Headers/__clang_cuda_device_functions.h
@@ -10,7 +10,7 @@
 #ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 #define __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
 #endif
@@ -20,7 +20,7 @@
 // we implement in this file. We need static in order to avoid emitting unused
 // functions and __forceinline__ helps inlining these wrappers at -O1.
 #pragma push_macro("__DEVICE__")
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
 #define __DEVICE__ static __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE__ static __device__ __forceinline__
@@ -1466,14 +1466,14 @@ __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
 
 // For OpenMP we require the user to include <time.h> as we need to know what
 // clock_t is on the system.
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 __DEVICE__ /* clock_t= */ int clock() { return __nvvm_read_ptx_sreg_clock(); }
 #endif
 __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
 
 // These functions shouldn't be declared when including this header
 // for math function resolution purposes.
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
   return __builtin_memcpy(__a, __b, __c);
 }

diff  --git a/clang/lib/Headers/__clang_cuda_libdevice_declares.h b/clang/lib/Headers/__clang_cuda_libdevice_declares.h
index 4d70353394c8..6173b589e3ef 100644
--- a/clang/lib/Headers/__clang_cuda_libdevice_declares.h
+++ b/clang/lib/Headers/__clang_cuda_libdevice_declares.h
@@ -14,7 +14,7 @@
 extern "C" {
 #endif
 
-#if defined(_OPENMP)
+#if defined(__OPENMP_NVPTX__)
 #define __DEVICE__
 #elif defined(__CUDA__)
 #define __DEVICE__ __device__

diff  --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 939c71a731e5..2e8e6ae71d9c 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -12,7 +12,7 @@
 #error "This file is for CUDA compilation only."
 #endif
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
 #endif
@@ -22,7 +22,7 @@
 // we implement in this file. We need static in order to avoid emitting unused
 // functions and __forceinline__ helps inlining these wrappers at -O1.
 #pragma push_macro("__DEVICE__")
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
 #if defined(__cplusplus)
 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
 #else
@@ -36,7 +36,7 @@
 // because the OpenMP overlay requires constexpr functions here but prior to
 // c++14 void return functions could not be constexpr.
 #pragma push_macro("__DEVICE_VOID__")
-#ifdef _OPENMP && defined(__cplusplus) && __cplusplus < 201402L
+#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
 #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE_VOID__ __DEVICE__

diff  --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
index 9ff0a186273a..406c9748e286 100644
--- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -22,11 +22,15 @@ extern "C" {
 #endif
 
 #define __CUDA__
+#define __OPENMP_NVPTX__
+
 /// Include declarations for libdevice functions.
 #include <__clang_cuda_libdevice_declares.h>
 
 /// Provide definitions for these functions.
 #include <__clang_cuda_device_functions.h>
+
+#undef __OPENMP_NVPTX__
 #undef __CUDA__
 
 #ifdef __cplusplus

diff  --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath
index 05be252fa9fb..bd6011eb6f6d 100644
--- a/clang/lib/Headers/openmp_wrappers/cmath
+++ b/clang/lib/Headers/openmp_wrappers/cmath
@@ -28,7 +28,9 @@
     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 
 #define __CUDA__
+#define __OPENMP_NVPTX__
 #include <__clang_cuda_cmath.h>
+#undef __OPENMP_NVPTX__
 #undef __CUDA__
 
 // Overloads not provided by the CUDA wrappers but by the CUDA system headers.

diff  --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h
index e917a149b5c9..c64af8b13ece 100644
--- a/clang/lib/Headers/openmp_wrappers/math.h
+++ b/clang/lib/Headers/openmp_wrappers/math.h
@@ -41,7 +41,9 @@
     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 
 #define __CUDA__
+#define __OPENMP_NVPTX__
 #include <__clang_cuda_math.h>
+#undef __OPENMP_NVPTX__
 #undef __CUDA__
 
 #pragma omp end declare variant