[clang] 9765220 - [OpenMP] Overload `std::isnan` and friends multiple times for the GPU

Wed Sep 16 11:40:04 PDT 2020

Author: Johannes Doerfert
Date: 2020-09-16T13:37:09-05:00
New Revision: 97652202d1e6964d5d7a1c03a257452c7ad95233

URL: https://github.com/llvm/llvm-project/commit/97652202d1e6964d5d7a1c03a257452c7ad95233
DIFF: https://github.com/llvm/llvm-project/commit/97652202d1e6964d5d7a1c03a257452c7ad95233.diff

LOG: [OpenMP] Overload `std::isnan` and friends multiple times for the GPU

`std::isnan` and friends can be found in two variants in the wild, one
returns `bool`, as the standard defines it, one returns `int`, as the C
macros do. So far we kinda hoped the system versions of these functions
will work for people, e.g. they are definitions that can be compiled for
the target. We know that is not the case always so we leverage the
`disable_implicit_base` OpenMP context extension to specialize both
versions of these functions without causing an invalid redeclaration.

Reviewed By: JonChesterfield, tra

Differential Revision: https://reviews.llvm.org/D85879

Added: 
    clang/test/Headers/openmp_device_math_isnan.cpp

Modified: 
    clang/lib/Headers/__clang_cuda_cmath.h
    clang/test/Headers/Inputs/include/cmath

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h
index 8ba182689a4f..f49463d72e04 100644

--- a/clang/lib/Headers/__clang_cuda_cmath.h
+++ b/clang/lib/Headers/__clang_cuda_cmath.h
@@ -66,10 +66,38 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
 }
 
 // For inscrutable reasons, the CUDA headers define these functions for us on
-// Windows. For OpenMP we omit these as some old system headers have
-// non-conforming `isinf(float)` and `isnan(float)` implementations that return
-// an `int`. The system versions of these functions should be fine anyway.
-#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__)
+// Windows.
+#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
+
+// For OpenMP we work around some old system headers that have non-conforming
+// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
+// this by providing two versions of these functions, 
diff ering only in the
+// return type. To avoid conflicting definitions we disable implicit base
+// function generation. That means we will end up with two specializations, one
+// per type, but only one has a base function defined by the system header.
+#if defined(__OPENMP_NVPTX__)
+#pragma omp begin declare variant match(                                       \
+    implementation = {extension(disable_implicit_base)})
+
+// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
+//        add a suffix. This means we would clash with the names of the variants
+//        (note that we do not create implicit base functions here). To avoid
+//        this clash we add a new trait to some of them that is always true
+//        (this is LLVM after all ;)). It will only influence the mangled name
+//        of the variants inside the inner region and avoid the clash.
+#pragma omp begin declare variant match(implementation = {vendor(llvm)})
+
+__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
+__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+
+#pragma omp end declare variant
+
+#endif
+
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
@@ -79,6 +107,11 @@ __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+
+#if defined(__OPENMP_NVPTX__)
+#pragma omp end declare variant
+#endif
+
 #endif
 
 __DEVICE__ bool isgreater(float __x, float __y) {

diff  --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath
index 5e4e8b67514f..20e34898b553 100644
--- a/clang/test/Headers/Inputs/include/cmath
+++ b/clang/test/Headers/Inputs/include/cmath
@@ -82,8 +82,13 @@ bool isless(float, float);
 bool islessgreater(double, double);
 bool islessgreater(float, float);
 bool isnan(long double);
+#ifdef USE_ISNAN_WITH_INT_RETURN
+int isnan(double);
+int isnan(float);
+#else
 bool isnan(double);
 bool isnan(float);
+#endif
 bool isnormal(double);
 bool isnormal(float);
 bool isunordered(double, double);

diff  --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp
new file mode 100644
index 000000000000..35443dbdebea
--- /dev/null
+++ b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// expected-no-diagnostics
+
+#include <cmath>
+
+double math(float f, double d) {
+  double r = 0;
+  // INT_RETURN: call i32 @__nv_isnanf(float
+  // BOOL_RETURN: call i32 @__nv_isnanf(float
+  r += std::isnan(f);
+  // INT_RETURN: call i32 @__nv_isnand(double
+  // BOOL_RETURN: call i32 @__nv_isnand(double
+  r += std::isnan(d);
+  return r;
+}
+
+long double foo(float f, double d, long double ld) {
+  double r = ld;
+  r += math(f, d);
+#pragma omp target map(r)
+  { r += math(f, d); }
+  return r;
+}