[clang] d999cbc - [OpenMP] Initial support for std::complex in target regions
Johannes Doerfert via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 8 15:36:14 PDT 2020
Author: Johannes Doerfert
Date: 2020-07-08T17:33:59-05:00
New Revision: d999cbc98832154e15e786b98281211d5c1b9f5d
URL: https://github.com/llvm/llvm-project/commit/d999cbc98832154e15e786b98281211d5c1b9f5d
DIFF: https://github.com/llvm/llvm-project/commit/d999cbc98832154e15e786b98281211d5c1b9f5d.diff
LOG: [OpenMP] Initial support for std::complex in target regions
This simply follows the scheme we have for other wrappers. It resolves
the current link problem, e.g., `__muldc3 not found`, when std::complex
operations are used on a device.
This will not allow complex make math function calls to work properly,
e.g., sin, but that is more complex (pan intended) anyway.
Reviewed By: tra, JonChesterfield
Differential Revision: https://reviews.llvm.org/D80897
Added:
clang/lib/Headers/openmp_wrappers/complex
clang/lib/Headers/openmp_wrappers/complex.h
clang/test/Headers/Inputs/include/complex
clang/test/Headers/nvptx_device_math_complex.cpp
Modified:
clang/lib/Headers/CMakeLists.txt
clang/lib/Headers/__clang_cuda_complex_builtins.h
clang/lib/Headers/__clang_cuda_math.h
clang/test/Headers/Inputs/include/cmath
clang/test/Headers/Inputs/include/cstdlib
clang/test/Headers/nvptx_device_math_complex.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index e7bee192d918..0692fe75a441 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -151,6 +151,8 @@ set(ppc_wrapper_files
set(openmp_wrapper_files
openmp_wrappers/math.h
openmp_wrappers/cmath
+ openmp_wrappers/complex.h
+ openmp_wrappers/complex
openmp_wrappers/__clang_openmp_device_functions.h
openmp_wrappers/new
)
diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index 576a958b16bb..d698be71d011 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -13,10 +13,61 @@
// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are
// libgcc functions that clang assumes are available when compiling c99 complex
// operations. (These implementations come from libc++, and have been modified
-// to work with CUDA.)
+// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
-extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
- double __c, double __d) {
+#pragma push_macro("__DEVICE__")
+#ifdef _OPENMP
+#pragma omp declare target
+#define __DEVICE__ __attribute__((noinline, nothrow, cold))
+#else
+#define __DEVICE__ __device__ inline
+#endif
+
+// Make the algorithms available for C and C++ by selecting the right functions.
+#if defined(__cplusplus)
+// TODO: In OpenMP mode we cannot overload isinf/isnan/isfinite the way we
+// overload all other math functions because old math system headers and not
+// always conformant and return an integer instead of a boolean. Until that has
+// been addressed we need to work around it. For now, we substituate with the
+// calls we would have used to implement those three functions. Note that we
+// could use the C alternatives as well.
+#define _ISNANd ::__isnan
+#define _ISNANf ::__isnanf
+#define _ISINFd ::__isinf
+#define _ISINFf ::__isinff
+#define _ISFINITEd ::__isfinited
+#define _ISFINITEf ::__finitef
+#define _COPYSIGNd std::copysign
+#define _COPYSIGNf std::copysign
+#define _SCALBNd std::scalbn
+#define _SCALBNf std::scalbn
+#define _ABSd std::abs
+#define _ABSf std::abs
+#define _LOGBd std::logb
+#define _LOGBf std::logb
+#else
+#define _ISNANd isnan
+#define _ISNANf isnanf
+#define _ISINFd isinf
+#define _ISINFf isinff
+#define _ISFINITEd isfinite
+#define _ISFINITEf isfinitef
+#define _COPYSIGNd copysign
+#define _COPYSIGNf copysignf
+#define _SCALBNd scalbn
+#define _SCALBNf scalbnf
+#define _ABSd abs
+#define _ABSf absf
+#define _LOGBd logb
+#define _LOGBf logbf
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
+ double __d) {
double __ac = __a * __c;
double __bd = __b * __d;
double __ad = __a * __d;
@@ -24,50 +75,49 @@ extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
double _Complex z;
__real__(z) = __ac - __bd;
__imag__(z) = __ad + __bc;
- if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
+ if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
int __recalc = 0;
- if (std::isinf(__a) || std::isinf(__b)) {
- __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
- __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
- if (std::isnan(__c))
- __c = std::copysign(0, __c);
- if (std::isnan(__d))
- __d = std::copysign(0, __d);
+ if (_ISINFd(__a) || _ISINFd(__b)) {
+ __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
+ __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
+ if (_ISNANd(__c))
+ __c = _COPYSIGNd(0, __c);
+ if (_ISNANd(__d))
+ __d = _COPYSIGNd(0, __d);
__recalc = 1;
}
- if (std::isinf(__c) || std::isinf(__d)) {
- __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
- __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
- if (std::isnan(__a))
- __a = std::copysign(0, __a);
- if (std::isnan(__b))
- __b = std::copysign(0, __b);
+ if (_ISINFd(__c) || _ISINFd(__d)) {
+ __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
+ __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
+ if (_ISNANd(__a))
+ __a = _COPYSIGNd(0, __a);
+ if (_ISNANd(__b))
+ __b = _COPYSIGNd(0, __b);
__recalc = 1;
}
- if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
- std::isinf(__ad) || std::isinf(__bc))) {
- if (std::isnan(__a))
- __a = std::copysign(0, __a);
- if (std::isnan(__b))
- __b = std::copysign(0, __b);
- if (std::isnan(__c))
- __c = std::copysign(0, __c);
- if (std::isnan(__d))
- __d = std::copysign(0, __d);
+ if (!__recalc &&
+ (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
+ if (_ISNANd(__a))
+ __a = _COPYSIGNd(0, __a);
+ if (_ISNANd(__b))
+ __b = _COPYSIGNd(0, __b);
+ if (_ISNANd(__c))
+ __c = _COPYSIGNd(0, __c);
+ if (_ISNANd(__d))
+ __d = _COPYSIGNd(0, __d);
__recalc = 1;
}
if (__recalc) {
// Can't use std::numeric_limits<double>::infinity() -- that doesn't have
// a device overload (and isn't constexpr before C++11, naturally).
- __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
- __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
+ __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
+ __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
}
}
return z;
}
-extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
- float __c, float __d) {
+__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
float __ac = __a * __c;
float __bd = __b * __d;
float __ad = __a * __d;
@@ -75,36 +125,36 @@ extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
float _Complex z;
__real__(z) = __ac - __bd;
__imag__(z) = __ad + __bc;
- if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
+ if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
int __recalc = 0;
- if (std::isinf(__a) || std::isinf(__b)) {
- __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
- __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
- if (std::isnan(__c))
- __c = std::copysign(0, __c);
- if (std::isnan(__d))
- __d = std::copysign(0, __d);
+ if (_ISINFf(__a) || _ISINFf(__b)) {
+ __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
+ __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
+ if (_ISNANf(__c))
+ __c = _COPYSIGNf(0, __c);
+ if (_ISNANf(__d))
+ __d = _COPYSIGNf(0, __d);
__recalc = 1;
}
- if (std::isinf(__c) || std::isinf(__d)) {
- __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
- __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
- if (std::isnan(__a))
- __a = std::copysign(0, __a);
- if (std::isnan(__b))
- __b = std::copysign(0, __b);
+ if (_ISINFf(__c) || _ISINFf(__d)) {
+ __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
+ __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
+ if (_ISNANf(__a))
+ __a = _COPYSIGNf(0, __a);
+ if (_ISNANf(__b))
+ __b = _COPYSIGNf(0, __b);
__recalc = 1;
}
- if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
- std::isinf(__ad) || std::isinf(__bc))) {
- if (std::isnan(__a))
- __a = std::copysign(0, __a);
- if (std::isnan(__b))
- __b = std::copysign(0, __b);
- if (std::isnan(__c))
- __c = std::copysign(0, __c);
- if (std::isnan(__d))
- __d = std::copysign(0, __d);
+ if (!__recalc &&
+ (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
+ if (_ISNANf(__a))
+ __a = _COPYSIGNf(0, __a);
+ if (_ISNANf(__b))
+ __b = _COPYSIGNf(0, __b);
+ if (_ISNANf(__c))
+ __c = _COPYSIGNf(0, __c);
+ if (_ISNANf(__d))
+ __d = _COPYSIGNf(0, __d);
__recalc = 1;
}
if (__recalc) {
@@ -115,36 +165,36 @@ extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
return z;
}
-extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
- double __c, double __d) {
+__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
+ double __d) {
int __ilogbw = 0;
// Can't use std::max, because that's defined in <algorithm>, and we don't
// want to pull that in for every compile. The CUDA headers define
// ::max(float, float) and ::max(double, double), which is sufficient for us.
- double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
- if (std::isfinite(__logbw)) {
+ double __logbw = _LOGBd(max(_ABSd(__c), _ABSd(__d)));
+ if (_ISFINITEd(__logbw)) {
__ilogbw = (int)__logbw;
- __c = std::scalbn(__c, -__ilogbw);
- __d = std::scalbn(__d, -__ilogbw);
+ __c = _SCALBNd(__c, -__ilogbw);
+ __d = _SCALBNd(__d, -__ilogbw);
}
double __denom = __c * __c + __d * __d;
double _Complex z;
- __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
- __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
- if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
- if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {
- __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
- __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
- } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
- std::isfinite(__d)) {
- __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);
- __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);
- __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
- __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
- } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&
- std::isfinite(__b)) {
- __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);
- __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);
+ __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
+ __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
+ if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
+ __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
+ __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
+ } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
+ _ISFINITEd(__d)) {
+ __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
+ __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
+ __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
+ __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
+ } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
+ _ISFINITEd(__b)) {
+ __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
+ __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
__real__(z) = 0.0 * (__a * __c + __b * __d);
__imag__(z) = 0.0 * (__b * __c - __a * __d);
}
@@ -152,33 +202,32 @@ extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
return z;
}
-extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
- float __c, float __d) {
+__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
int __ilogbw = 0;
- float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
- if (std::isfinite(__logbw)) {
+ float __logbw = _LOGBf(max(_ABSf(__c), _ABSf(__d)));
+ if (_ISFINITEf(__logbw)) {
__ilogbw = (int)__logbw;
- __c = std::scalbn(__c, -__ilogbw);
- __d = std::scalbn(__d, -__ilogbw);
+ __c = _SCALBNf(__c, -__ilogbw);
+ __d = _SCALBNf(__d, -__ilogbw);
}
float __denom = __c * __c + __d * __d;
float _Complex z;
- __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
- __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
- if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
- if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {
- __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
- __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
- } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
- std::isfinite(__d)) {
- __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
- __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
+ __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
+ __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
+ if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
+ __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
+ __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
+ } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
+ _ISFINITEf(__d)) {
+ __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
+ __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
__real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
__imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
- } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&
- std::isfinite(__b)) {
- __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
- __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
+ } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
+ _ISFINITEf(__b)) {
+ __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
+ __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
__real__(z) = 0 * (__a * __c + __b * __d);
__imag__(z) = 0 * (__b * __c - __a * __d);
}
@@ -186,4 +235,29 @@ extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
return z;
}
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#undef _ISNANd
+#undef _ISNANf
+#undef _ISINFd
+#undef _ISINFf
+#undef _COPYSIGNd
+#undef _COPYSIGNf
+#undef _ISFINITEd
+#undef _ISFINITEf
+#undef _SCALBNd
+#undef _SCALBNf
+#undef _ABSd
+#undef _ABSf
+#undef _LOGBd
+#undef _LOGBf
+
+#ifdef _OPENMP
+#pragma omp end declare target
+#endif
+
+#pragma pop_macro("__DEVICE__")
+
#endif // __CLANG_CUDA_COMPLEX_BUILTINS
diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 01db2f29af45..939c71a731e5 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -340,6 +340,16 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
+// In C++ mode OpenMP takes the system versions of these because some math
+// headers provide the wrong return type. This cannot happen in C and we can and
+// want to use the specialized versions right away.
+#if defined(_OPENMP) && !defined(__cplusplus)
+__DEVICE__ int isinff(float __x) { return __nv_isinff(__x); }
+__DEVICE__ int isinf(double __x) { return __nv_isinfd(__x); }
+__DEVICE__ int isnanf(float __x) { return __nv_isnanf(__x); }
+__DEVICE__ int isnan(double __x) { return __nv_isnand(__x); }
+#endif
+
#pragma pop_macro("__DEVICE__")
#pragma pop_macro("__DEVICE_VOID__")
#pragma pop_macro("__FAST_OR_SLOW")
diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex
new file mode 100644
index 000000000000..1ed0b14879ef
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/complex
@@ -0,0 +1,25 @@
+/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_OPENMP_COMPLEX__
+#define __CLANG_OPENMP_COMPLEX__
+
+#ifndef _OPENMP
+#error "This file is for OpenMP compilation only."
+#endif
+
+// We require std::math functions in the complex builtins below.
+#include <cmath>
+
+#define __CUDA__
+#include <__clang_cuda_complex_builtins.h>
+#endif
+
+// Grab the host header too.
+#include_next <complex>
diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h
new file mode 100644
index 000000000000..829c7a785725
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/complex.h
@@ -0,0 +1,25 @@
+/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_OPENMP_COMPLEX_H__
+#define __CLANG_OPENMP_COMPLEX_H__
+
+#ifndef _OPENMP
+#error "This file is for OpenMP compilation only."
+#endif
+
+// We require math functions in the complex builtins below.
+#include <math.h>
+
+#define __CUDA__
+#include <__clang_cuda_complex_builtins.h>
+#endif
+
+// Grab the host header too.
+#include_next <complex.h>
diff --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath
index 0cadc131d211..5e4e8b67514f 100644
--- a/clang/test/Headers/Inputs/include/cmath
+++ b/clang/test/Headers/Inputs/include/cmath
@@ -49,8 +49,12 @@ double fma(double, double, double);
float fma(float, float, float);
double fmax(double, double);
float fmax(float, float);
+float max(float, float);
+double max(double, double);
double fmin(double, double);
float fmin(float, float);
+float min(float, float);
+double min(double, double);
double fmod(double, double);
float fmod(float, float);
int fpclassify(double);
diff --git a/clang/test/Headers/Inputs/include/complex b/clang/test/Headers/Inputs/include/complex
new file mode 100644
index 000000000000..f3aefab7954b
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/complex
@@ -0,0 +1,301 @@
+#pragma once
+
+#include <cmath>
+
+#define INFINITY (__builtin_inff())
+
+namespace std {
+
+// Taken from libc++
+template <class _Tp>
+class complex {
+public:
+ typedef _Tp value_type;
+
+private:
+ value_type __re_;
+ value_type __im_;
+
+public:
+ complex(const value_type &__re = value_type(), const value_type &__im = value_type())
+ : __re_(__re), __im_(__im) {}
+ template <class _Xp>
+ complex(const complex<_Xp> &__c)
+ : __re_(__c.real()), __im_(__c.imag()) {}
+
+ value_type real() const { return __re_; }
+ value_type imag() const { return __im_; }
+
+ void real(value_type __re) { __re_ = __re; }
+ void imag(value_type __im) { __im_ = __im; }
+
+ complex &operator=(const value_type &__re) {
+ __re_ = __re;
+ __im_ = value_type();
+ return *this;
+ }
+ complex &operator+=(const value_type &__re) {
+ __re_ += __re;
+ return *this;
+ }
+ complex &operator-=(const value_type &__re) {
+ __re_ -= __re;
+ return *this;
+ }
+ complex &operator*=(const value_type &__re) {
+ __re_ *= __re;
+ __im_ *= __re;
+ return *this;
+ }
+ complex &operator/=(const value_type &__re) {
+ __re_ /= __re;
+ __im_ /= __re;
+ return *this;
+ }
+
+ template <class _Xp>
+ complex &operator=(const complex<_Xp> &__c) {
+ __re_ = __c.real();
+ __im_ = __c.imag();
+ return *this;
+ }
+ template <class _Xp>
+ complex &operator+=(const complex<_Xp> &__c) {
+ __re_ += __c.real();
+ __im_ += __c.imag();
+ return *this;
+ }
+ template <class _Xp>
+ complex &operator-=(const complex<_Xp> &__c) {
+ __re_ -= __c.real();
+ __im_ -= __c.imag();
+ return *this;
+ }
+ template <class _Xp>
+ complex &operator*=(const complex<_Xp> &__c) {
+ *this = *this * complex(__c.real(), __c.imag());
+ return *this;
+ }
+ template <class _Xp>
+ complex &operator/=(const complex<_Xp> &__c) {
+ *this = *this / complex(__c.real(), __c.imag());
+ return *this;
+ }
+};
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+ complex<_Tp> __t(__x);
+ __t += __y;
+ return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x, const _Tp &__y) {
+ complex<_Tp> __t(__x);
+ __t += __y;
+ return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const _Tp &__x, const complex<_Tp> &__y) {
+ complex<_Tp> __t(__y);
+ __t += __x;
+ return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+ complex<_Tp> __t(__x);
+ __t -= __y;
+ return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x, const _Tp &__y) {
+ complex<_Tp> __t(__x);
+ __t -= __y;
+ return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const _Tp &__x, const complex<_Tp> &__y) {
+ complex<_Tp> __t(-__y);
+ __t += __x;
+ return __t;
+}
+
+template <class _Tp>
+complex<_Tp>
+operator*(const complex<_Tp> &__z, const complex<_Tp> &__w) {
+ _Tp __a = __z.real();
+ _Tp __b = __z.imag();
+ _Tp __c = __w.real();
+ _Tp __d = __w.imag();
+ _Tp __ac = __a * __c;
+ _Tp __bd = __b * __d;
+ _Tp __ad = __a * __d;
+ _Tp __bc = __b * __c;
+ _Tp __x = __ac - __bd;
+ _Tp __y = __ad + __bc;
+ if (std::isnan(__x) && std::isnan(__y)) {
+ bool __recalc = false;
+ if (std::isinf(__a) || std::isinf(__b)) {
+ __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a);
+ __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b);
+ if (std::isnan(__c))
+ __c = copysign(_Tp(0), __c);
+ if (std::isnan(__d))
+ __d = copysign(_Tp(0), __d);
+ __recalc = true;
+ }
+ if (std::isinf(__c) || std::isinf(__d)) {
+ __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c);
+ __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d);
+ if (std::isnan(__a))
+ __a = copysign(_Tp(0), __a);
+ if (std::isnan(__b))
+ __b = copysign(_Tp(0), __b);
+ __recalc = true;
+ }
+ if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
+ std::isinf(__ad) || std::isinf(__bc))) {
+ if (std::isnan(__a))
+ __a = copysign(_Tp(0), __a);
+ if (std::isnan(__b))
+ __b = copysign(_Tp(0), __b);
+ if (std::isnan(__c))
+ __c = copysign(_Tp(0), __c);
+ if (std::isnan(__d))
+ __d = copysign(_Tp(0), __d);
+ __recalc = true;
+ }
+ if (__recalc) {
+ __x = _Tp(INFINITY) * (__a * __c - __b * __d);
+ __y = _Tp(INFINITY) * (__a * __d + __b * __c);
+ }
+ }
+ return complex<_Tp>(__x, __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator*(const complex<_Tp> &__x, const _Tp &__y) {
+ complex<_Tp> __t(__x);
+ __t *= __y;
+ return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator*(const _Tp &__x, const complex<_Tp> &__y) {
+ complex<_Tp> __t(__y);
+ __t *= __x;
+ return __t;
+}
+
+template <class _Tp>
+complex<_Tp>
+operator/(const complex<_Tp> &__z, const complex<_Tp> &__w) {
+ int __ilogbw = 0;
+ _Tp __a = __z.real();
+ _Tp __b = __z.imag();
+ _Tp __c = __w.real();
+ _Tp __d = __w.imag();
+ _Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
+ if (std::isfinite(__logbw)) {
+ __ilogbw = static_cast<int>(__logbw);
+ __c = scalbn(__c, -__ilogbw);
+ __d = scalbn(__d, -__ilogbw);
+ }
+ _Tp __denom = __c * __c + __d * __d;
+ _Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+ _Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (std::isnan(__x) && std::isnan(__y)) {
+ if ((__denom == _Tp(0)) && (!std::isnan(__a) || !std::isnan(__b))) {
+ __x = copysign(_Tp(INFINITY), __c) * __a;
+ __y = copysign(_Tp(INFINITY), __c) * __b;
+ } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && std::isfinite(__d)) {
+ __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a);
+ __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b);
+ __x = _Tp(INFINITY) * (__a * __c + __b * __d);
+ __y = _Tp(INFINITY) * (__b * __c - __a * __d);
+ } else if (std::isinf(__logbw) && __logbw > _Tp(0) && std::isfinite(__a) && std::isfinite(__b)) {
+ __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c);
+ __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d);
+ __x = _Tp(0) * (__a * __c + __b * __d);
+ __y = _Tp(0) * (__b * __c - __a * __d);
+ }
+ }
+ return complex<_Tp>(__x, __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator/(const complex<_Tp> &__x, const _Tp &__y) {
+ return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator/(const _Tp &__x, const complex<_Tp> &__y) {
+ complex<_Tp> __t(__x);
+ __t /= __y;
+ return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x) {
+ return __x;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x) {
+ return complex<_Tp>(-__x.real(), -__x.imag());
+}
+
+template <class _Tp>
+inline bool
+operator==(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+ return __x.real() == __y.real() && __x.imag() == __y.imag();
+}
+
+template <class _Tp>
+inline bool
+operator==(const complex<_Tp> &__x, const _Tp &__y) {
+ return __x.real() == __y && __x.imag() == 0;
+}
+
+template <class _Tp>
+inline bool
+operator==(const _Tp &__x, const complex<_Tp> &__y) {
+ return __x == __y.real() && 0 == __y.imag();
+}
+
+template <class _Tp>
+inline bool
+operator!=(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+ return !(__x == __y);
+}
+
+template <class _Tp>
+inline bool
+operator!=(const complex<_Tp> &__x, const _Tp &__y) {
+ return !(__x == __y);
+}
+
+template <class _Tp>
+inline bool
+operator!=(const _Tp &__x, const complex<_Tp> &__y) {
+ return !(__x == __y);
+}
+
+} // namespace std
diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib
index 00e81e8c4a15..1d1864a98976 100644
--- a/clang/test/Headers/Inputs/include/cstdlib
+++ b/clang/test/Headers/Inputs/include/cstdlib
@@ -24,4 +24,8 @@ inline long long
abs(long long __x) { return __builtin_llabs (__x); }
float fabs(float __x) { return __builtin_fabs(__x); }
+
+float abs(float __x) { return fabs(__x); }
+double abs(double __x) { return fabs(__x); }
+
}
diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 43f4ec6a6b59..9b96b5dd8c22 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -1,10 +1,22 @@
// REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
// expected-no-diagnostics
-// CHECK-DAG: call { float, float } @__divsc3(
-// CHECK-DAG: call { float, float } @__mulsc3(
+#ifdef __cplusplus
+#include <complex>
+#else
+#include <complex.h>
+#endif
+
+// CHECK-DAG: define {{.*}} @__mulsc3
+// CHECK-DAG: define {{.*}} @__muldc3
+// CHECK-DAG: define {{.*}} @__divsc3
+// CHECK-DAG: define {{.*}} @__divdc3
+
+// CHECK-DAG: call float @__nv_scalbnf(
void test_scmplx(float _Complex a) {
#pragma omp target
{
@@ -12,9 +24,7 @@ void test_scmplx(float _Complex a) {
}
}
-
-// CHECK-DAG: call { double, double } @__divdc3(
-// CHECK-DAG: call { double, double } @__muldc3(
+// CHECK-DAG: call double @__nv_scalbn(
void test_dcmplx(double _Complex a) {
#pragma omp target
{
diff --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp
new file mode 100644
index 000000000000..15434d907605
--- /dev/null
+++ b/clang/test/Headers/nvptx_device_math_complex.cpp
@@ -0,0 +1,27 @@
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include <complex>
+
+// CHECK-DAG: define {{.*}} @__mulsc3
+// CHECK-DAG: define {{.*}} @__muldc3
+// CHECK-DAG: define {{.*}} @__divsc3
+// CHECK-DAG: define {{.*}} @__divdc3
+
+// CHECK-DAG: call float @__nv_scalbnf(
+void test_scmplx(std::complex<float> a) {
+#pragma omp target
+ {
+ (void)(a * (a / a));
+ }
+}
+
+// CHECK-DAG: call double @__nv_scalbn(
+void test_dcmplx(std::complex<double> a) {
+#pragma omp target
+ {
+ (void)(a * (a / a));
+ }
+}
More information about the cfe-commits
mailing list