[clang] 5dfdc18 - [OpenMP][AMDGCN] Apply fix for isnan, isinf and isfinite for amdgcn.
Jon Chesterfield via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 23 07:26:26 PDT 2021
Author: Ethan Stewart
Date: 2021-06-23T15:26:09+01:00
New Revision: 5dfdc1812d9b9c043204d39318f6446424d8f2d7
URL: https://github.com/llvm/llvm-project/commit/5dfdc1812d9b9c043204d39318f6446424d8f2d7
DIFF: https://github.com/llvm/llvm-project/commit/5dfdc1812d9b9c043204d39318f6446424d8f2d7.diff
LOG: [OpenMP][AMDGCN] Apply fix for isnan, isinf and isfinite for amdgcn.
This fixes issues with various return types(bool/int) and was already
in place for nvptx headers, adjusted to work for amdgcn. This does
not affect hip as the change is guarded with OPENMP_AMDGCN.
Similar to D85879.
Reviewed By: jdoerfert, JonChesterfield, yaxunl
Differential Revision: https://reviews.llvm.org/D104677
Added:
Modified:
clang/lib/Headers/__clang_hip_cmath.h
clang/test/Headers/hip-header.hip
clang/test/Headers/openmp_device_math_isnan.cpp
Removed:
################################################################################
diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h
index b5d7c16ac5e41..7342705434e6b 100644
--- a/clang/lib/Headers/__clang_hip_cmath.h
+++ b/clang/lib/Headers/__clang_hip_cmath.h
@@ -52,8 +52,46 @@ __DEVICE__ int fpclassify(double __x) {
__DEVICE__ float frexp(float __arg, int *__exp) {
return ::frexpf(__arg, __exp);
}
+
+#if defined(__OPENMP_AMDGCN__)
+// For OpenMP we work around some old system headers that have non-conforming
+// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
+// this by providing two versions of these functions,
diff ering only in the
+// return type. To avoid conflicting definitions we disable implicit base
+// function generation. That means we will end up with two specializations, one
+// per type, but only one has a base function defined by the system header.
+#pragma omp begin declare variant match( \
+ implementation = {extension(disable_implicit_base)})
+
+// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
+// add a suffix. This means we would clash with the names of the variants
+// (note that we do not create implicit base functions here). To avoid
+// this clash we add a new trait to some of them that is always true
+// (this is LLVM after all ;)). It will only influence the mangled name
+// of the variants inside the inner region and avoid the clash.
+#pragma omp begin declare variant match(implementation = {vendor(llvm)})
+
+__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+
+#pragma omp end declare variant
+#endif // defined(__OPENMP_AMDGCN__)
+
+__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+
+#if defined(__OPENMP_AMDGCN__)
+#pragma omp end declare variant
+#endif // defined(__OPENMP_AMDGCN__)
+
__DEVICE__ bool isgreater(float __x, float __y) {
return __builtin_isgreater(__x, __y);
}
@@ -66,8 +104,6 @@ __DEVICE__ bool isgreaterequal(float __x, float __y) {
__DEVICE__ bool isgreaterequal(double __x, double __y) {
return __builtin_isgreaterequal(__x, __y);
}
-__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
__DEVICE__ bool isless(float __x, float __y) {
return __builtin_isless(__x, __y);
}
@@ -86,8 +122,6 @@ __DEVICE__ bool islessgreater(float __x, float __y) {
__DEVICE__ bool islessgreater(double __x, double __y) {
return __builtin_islessgreater(__x, __y);
}
-__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
__DEVICE__ bool isunordered(float __x, float __y) {
diff --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip
index 5bba1de2ce6c4..0e95d58d55700 100644
--- a/clang/test/Headers/hip-header.hip
+++ b/clang/test/Headers/hip-header.hip
@@ -8,6 +8,20 @@
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
+// RUN: -include cmath \
+// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN: -D__HIPCC_RTC__ | FileCheck %s -check-prefixes=AMD_BOOL_RETURN
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN: -internal-isystem %S/Inputs/include \
+// RUN: -include cmath \
+// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN: -D__HIPCC_RTC__ -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s -check-prefixes=AMD_INT_RETURN
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s
@@ -84,3 +98,20 @@ __device__ float test_floor() {
__device__ float test_max() {
return max(5, 6.0);
}
+
+// CHECK-LABEL: define{{.*}}@_Z10test_isnanv
+__device__ double test_isnan() {
+ double r = 0;
+ double d = 5.0;
+ float f = 5.0;
+
+ // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
+ // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
+ r += isnan(f);
+
+ // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
+ // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
+ r += isnan(d);
+
+ return r ;
+}
diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp
index 35443dbdebea6..7a75e4250c951 100644
--- a/clang/test/Headers/openmp_device_math_isnan.cpp
+++ b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -1,11 +1,19 @@
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD_BOOL_RETURN
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=AMD_BOOL_RETURN
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN
// expected-no-diagnostics
#include <cmath>
@@ -13,10 +21,14 @@
double math(float f, double d) {
double r = 0;
// INT_RETURN: call i32 @__nv_isnanf(float
+ // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
// BOOL_RETURN: call i32 @__nv_isnanf(float
+ // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
r += std::isnan(f);
// INT_RETURN: call i32 @__nv_isnand(double
+ // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
// BOOL_RETURN: call i32 @__nv_isnand(double
+ // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
r += std::isnan(d);
return r;
}
More information about the cfe-commits
mailing list