[clang] b5667d0 - [OpenMP][CUDA] Fix std::complex in GPU regions

Johannes Doerfert via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 10 22:42:32 PDT 2020


Author: Johannes Doerfert
Date: 2020-07-11T00:40:05-05:00
New Revision: b5667d00e0447747419a783697b84a37f59ce055

URL: https://github.com/llvm/llvm-project/commit/b5667d00e0447747419a783697b84a37f59ce055
DIFF: https://github.com/llvm/llvm-project/commit/b5667d00e0447747419a783697b84a37f59ce055.diff

LOG: [OpenMP][CUDA] Fix std::complex in GPU regions

The old way worked to some degree for C++-mode but in C mode we actually
tried to introduce variants of macros (e.g., isinf). To make both modes
work reliably we get rid of those extra variants and directly use NVIDIA
intrinsics in the complex implementation. While this has to be revisited
as we add other GPU targets which want to reuse the code, it should be
fine for now.

Reviewed By: tra, JonChesterfield, yaxunl

Differential Revision: https://reviews.llvm.org/D83591

Added: 
    

Modified: 
    clang/lib/Headers/__clang_cuda_complex_builtins.h
    clang/lib/Headers/__clang_cuda_math.h
    clang/test/Headers/nvptx_device_math_complex.c
    clang/test/Headers/nvptx_device_math_complex.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index c48c754ed1a4..8c10ff6b461f 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -23,20 +23,16 @@
 #define __DEVICE__ __device__ inline
 #endif
 
-// Make the algorithms available for C and C++ by selecting the right functions.
-#if defined(__cplusplus)
-// TODO: In OpenMP mode we cannot overload isinf/isnan/isfinite the way we
-// overload all other math functions because old math system headers and not
-// always conformant and return an integer instead of a boolean. Until that has
-// been addressed we need to work around it. For now, we substituate with the
-// calls we would have used to implement those three functions. Note that we
-// could use the C alternatives as well.
-#define _ISNANd ::__isnan
-#define _ISNANf ::__isnanf
-#define _ISINFd ::__isinf
-#define _ISINFf ::__isinff
-#define _ISFINITEd ::__isfinited
-#define _ISFINITEf ::__finitef
+// To make the algorithms available for C and C++ in CUDA and OpenMP we select
+// 
diff erent but equivalent function versions. TODO: For OpenMP we currently
+// select the native builtins as the overload support for templates is lacking.
+#if !defined(_OPENMP)
+#define _ISNANd std::isnan
+#define _ISNANf std::isnan
+#define _ISINFd std::isinf
+#define _ISINFf std::isinf
+#define _ISFINITEd std::isfinite
+#define _ISFINITEf std::isfinite
 #define _COPYSIGNd std::copysign
 #define _COPYSIGNf std::copysign
 #define _SCALBNd std::scalbn
@@ -46,20 +42,20 @@
 #define _LOGBd std::logb
 #define _LOGBf std::logb
 #else
-#define _ISNANd isnan
-#define _ISNANf isnanf
-#define _ISINFd isinf
-#define _ISINFf isinff
-#define _ISFINITEd isfinite
-#define _ISFINITEf isfinitef
-#define _COPYSIGNd copysign
-#define _COPYSIGNf copysignf
-#define _SCALBNd scalbn
-#define _SCALBNf scalbnf
-#define _ABSd abs
-#define _ABSf absf
-#define _LOGBd logb
-#define _LOGBf logbf
+#define _ISNANd __nv_isnand
+#define _ISNANf __nv_isnanf
+#define _ISINFd __nv_isinfd
+#define _ISINFf __nv_isinff
+#define _ISFINITEd __nv_isfinited
+#define _ISFINITEf __nv_finitef
+#define _COPYSIGNd __nv_copysign
+#define _COPYSIGNf __nv_copysignf
+#define _SCALBNd __nv_scalbn
+#define _SCALBNf __nv_scalbnf
+#define _ABSd __nv_fabs
+#define _ABSf __nv_fabsf
+#define _LOGBd __nv_logb
+#define _LOGBf __nv_logbf
 #endif
 
 #if defined(__cplusplus)

diff  --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 2e8e6ae71d9c..332e616702ac 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -340,16 +340,6 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
-// In C++ mode OpenMP takes the system versions of these because some math
-// headers provide the wrong return type. This cannot happen in C and we can and
-// want to use the specialized versions right away.
-#if defined(_OPENMP) && !defined(__cplusplus)
-__DEVICE__ int isinff(float __x) { return __nv_isinff(__x); }
-__DEVICE__ int isinf(double __x) { return __nv_isinfd(__x); }
-__DEVICE__ int isnanf(float __x) { return __nv_isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return __nv_isnand(__x); }
-#endif
-
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")

diff  --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 0e212592dd2b..6e3e8bffbd24 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -11,12 +11,34 @@
 #include <complex.h>
 #endif
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(float _Complex a) {
 #pragma omp target
   {
@@ -24,7 +46,6 @@ void test_scmplx(float _Complex a) {
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(double _Complex a) {
 #pragma omp target
   {

diff  --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp
index 58ed24b74b0e..e4b78deb05d7 100644
--- a/clang/test/Headers/nvptx_device_math_complex.cpp
+++ b/clang/test/Headers/nvptx_device_math_complex.cpp
@@ -5,12 +5,34 @@
 
 #include <complex>
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(std::complex<float> a) {
 #pragma omp target
   {
@@ -18,7 +40,6 @@ void test_scmplx(std::complex<float> a) {
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(std::complex<double> a) {
 #pragma omp target
   {


        


More information about the cfe-commits mailing list