r279140 - [CUDA] Improve handling of math functions.
Justin Lebar via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 18 13:43:13 PDT 2016
Author: jlebar
Date: Thu Aug 18 15:43:13 2016
New Revision: 279140
URL: http://llvm.org/viewvc/llvm-project?rev=279140&view=rev
Log:
[CUDA] Improve handling of math functions.
Summary:
A bunch of related changes here to our CUDA math headers.
- The second arg to nexttoward is a double (well, technically, long
double, but we don't have that), not a float.
- Add a forward-declare of llround(float), which is defined in the CUDA
headers. We need this for the same reason we need most of the other
forward-declares: To prevent a constexpr function in our standard
library from becoming host+device.
- Add nexttowardf implementation.
- Pull "foobarf" functions defined by the CUDA headers in the global
namespace into namespace std. This lets you do e.g. std::sinf.
- Add overloads for math functions accepting integer types. This lets
you do e.g. std::sin(0) without having an ambiguity between the
overload that takes a float and the one that takes a double.
With these changes, we pass testcases derived from libc++ for cmath and
math.h. We can check these testcases in to the test-suite once support
for CUDA lands there.
Reviewers: tra
Subscribers: cfe-commits
Differential Revision: https://reviews.llvm.org/D23627
Modified:
cfe/trunk/lib/Headers/__clang_cuda_cmath.h
cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h
Modified: cfe/trunk/lib/Headers/__clang_cuda_cmath.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__clang_cuda_cmath.h?rev=279140&r1=279139&r2=279140&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/__clang_cuda_cmath.h (original)
+++ cfe/trunk/lib/Headers/__clang_cuda_cmath.h Thu Aug 18 15:43:13 2016
@@ -26,13 +26,15 @@
#error "This file is for CUDA compilation only."
#endif
+#include <limits>
+
// CUDA lets us use various std math functions on the device side. This file
// works in concert with __clang_cuda_math_forward_declares.h to make this work.
//
// Specifically, the forward-declares header declares __device__ overloads for
// these functions in the global namespace, then pulls them into namespace std
// with 'using' statements. Then this file implements those functions, after
-// the implementations have been pulled in.
+// their implementations have been pulled in.
//
// It's important that we declare the functions in the global namespace and pull
// them into namespace std with using statements, as opposed to simply declaring
@@ -120,12 +122,15 @@ __DEVICE__ float ldexp(float __arg, int
__DEVICE__ float log(float __x) { return ::logf(__x); }
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
-__DEVICE__ float nexttoward(float __from, float __to) {
+__DEVICE__ float nexttoward(float __from, double __to) {
return __builtin_nexttowardf(__from, __to);
}
__DEVICE__ double nexttoward(double __from, double __to) {
return __builtin_nexttoward(__from, __to);
}
+__DEVICE__ float nexttowardf(float __from, double __to) {
+ return __builtin_nexttowardf(__from, __to);
+}
__DEVICE__ float pow(float __base, float __exp) {
return ::powf(__base, __exp);
}
@@ -143,6 +148,280 @@ __DEVICE__ float sqrt(float __x) { retur
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
+// Now we've defined everything we promised we'd define in
+// __clang_cuda_math_forward_declares.h. We need to do two additional things to
+// fix up our math functions.
+//
+// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
+// only sin(float) and sin(double), which means that e.g. sin(0) is
+// ambiguous.
+//
+// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
+// std. These are defined in the CUDA headers in the global namespace,
+// independent of everything else we've done here.
+
+// We can't use std::enable_if, because we want to be pre-C++11 compatible. But
+// we go ahead and unconditionally define functions that are only available when
+// compiling for C++11 to match the behavior of the CUDA headers.
+template<bool __B, class __T = void>
+struct __clang_cuda_enable_if {};
+
+template <class __T> struct __clang_cuda_enable_if<true, __T> {
+ typedef __T type;
+};
+
+// Defines an overload of __fn that accepts one integral argument, calls
+// __fn((double)x), and returns __retty.
+#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
+ template <typename __T> \
+ __DEVICE__ \
+ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
+ __retty>::type \
+ __fn(__T __x) { \
+ return ::__fn((double)__x); \
+ }
+
+// Defines an overload of __fn that accepts one two arithmetic arguments, calls
+// __fn((double)x, (double)y), and returns a double.
+//
+// Note this is different from OVERLOAD_1, which generates an overload that
+// accepts only *integral* arguments.
+#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
+ template <typename __T1, typename __T2> \
+ __DEVICE__ typename __clang_cuda_enable_if< \
+ std::numeric_limits<__T1>::is_specialized && \
+ std::numeric_limits<__T2>::is_specialized, \
+ __retty>::type \
+ __fn(__T1 __x, __T2 __y) { \
+ return __fn((double)__x, (double)__y); \
+ }
+
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
+
+#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
+#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
+
+// Overloads for functions that don't match the patterns expected by
+// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
+template <typename __T1, typename __T2, typename __T3>
+__DEVICE__ typename __clang_cuda_enable_if<
+ std::numeric_limits<__T1>::is_specialized &&
+ std::numeric_limits<__T2>::is_specialized &&
+ std::numeric_limits<__T3>::is_specialized,
+ double>::type
+fma(__T1 __x, __T2 __y, __T3 __z) {
+ return std::fma((double)__x, (double)__y, (double)__z);
+}
+
+template <typename __T>
+__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
+ double>::type
+frexp(__T __x, int *__exp) {
+ return std::frexp((double)__x, __exp);
+}
+
+template <typename __T>
+__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
+ double>::type
+ldexp(__T __x, int __exp) {
+ return std::ldexp((double)__x, __exp);
+}
+
+template <typename __T>
+__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
+ double>::type
+nexttoward(__T __from, double __to) {
+ return std::nexttoward((double)__from, __to);
+}
+
+template <typename __T1, typename __T2>
+__DEVICE__ typename __clang_cuda_enable_if<
+ std::numeric_limits<__T1>::is_specialized &&
+ std::numeric_limits<__T2>::is_specialized,
+ double>::type
+remquo(__T1 __x, __T2 __y, int *__quo) {
+ return std::remquo((double)__x, (double)__y, __quo);
+}
+
+template <typename __T>
+__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
+ double>::type
+scalbln(__T __x, long __exp) {
+ return std::scalbln((double)__x, __exp);
+}
+
+template <typename __T>
+__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
+ double>::type
+scalbn(__T __x, int __exp) {
+ return std::scalbn((double)__x, __exp);
+}
+
+namespace std {
+// Pull the new overloads we defined above into namespace std.
+using ::acos;
+using ::acosh;
+using ::asin;
+using ::asinh;
+using ::atan;
+using ::atan2;
+using ::atanh;
+using ::cbrt;
+using ::ceil;
+using ::cos;
+using ::cosh;
+using ::erf;
+using ::erfc;
+using ::exp;
+using ::exp2;
+using ::expm1;
+using ::fabs;
+using ::floor;
+using ::frexp;
+using ::ilogb;
+using ::ldexp;
+using ::lgamma;
+using ::llrint;
+using ::llround;
+using ::log;
+using ::log10;
+using ::log1p;
+using ::log2;
+using ::logb;
+using ::lrint;
+using ::lround;
+using ::nexttoward;
+using ::pow;
+using ::remquo;
+using ::scalbln;
+using ::scalbn;
+using ::sin;
+using ::sinh;
+using ::sqrt;
+using ::tan;
+using ::tanh;
+using ::tgamma;
+
+// Finally, pull the "foobarf" functions that CUDA defines in its headers into
+// namespace std.
+using ::acosf;
+using ::acoshf;
+using ::asinf;
+using ::asinhf;
+using ::atan2f;
+using ::atanf;
+using ::atanhf;
+using ::cbrtf;
+using ::ceilf;
+using ::copysignf;
+using ::cosf;
+using ::coshf;
+using ::erfcf;
+using ::erff;
+using ::exp2f;
+using ::expf;
+using ::expm1f;
+using ::fabsf;
+using ::fdimf;
+using ::floorf;
+using ::fmaf;
+using ::fmaxf;
+using ::fminf;
+using ::fmodf;
+using ::frexpf;
+using ::hypotf;
+using ::ilogbf;
+using ::ldexpf;
+using ::lgammaf;
+using ::llrintf;
+using ::llroundf;
+using ::log10f;
+using ::log1pf;
+using ::log2f;
+using ::logbf;
+using ::logf;
+using ::lrintf;
+using ::lroundf;
+using ::modff;
+using ::nearbyintf;
+using ::nextafterf;
+using ::nexttowardf;
+using ::nexttowardf;
+using ::powf;
+using ::remainderf;
+using ::remquof;
+using ::rintf;
+using ::roundf;
+using ::scalblnf;
+using ::scalbnf;
+using ::sinf;
+using ::sinhf;
+using ::sqrtf;
+using ::tanf;
+using ::tanhf;
+using ::tgammaf;
+using ::truncf;
+}
+
#undef __DEVICE__
#endif
Modified: cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h?rev=279140&r1=279139&r2=279140&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h (original)
+++ cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h Thu Aug 18 15:43:13 2016
@@ -140,6 +140,7 @@ __DEVICE__ long lrint(double);
__DEVICE__ long lrint(float);
__DEVICE__ long lround(double);
__DEVICE__ long lround(float);
+__DEVICE__ long long llround(float); // No llround(double).
__DEVICE__ double modf(double, double *);
__DEVICE__ float modf(float, float *);
__DEVICE__ double nan(const char *);
@@ -149,7 +150,8 @@ __DEVICE__ float nearbyint(float);
__DEVICE__ double nextafter(double, double);
__DEVICE__ float nextafter(float, float);
__DEVICE__ double nexttoward(double, double);
-__DEVICE__ float nexttoward(float, float);
+__DEVICE__ float nexttoward(float, double);
+__DEVICE__ float nexttowardf(float, double);
__DEVICE__ double pow(double, double);
__DEVICE__ double pow(double, int);
__DEVICE__ float pow(float, float);
@@ -235,6 +237,7 @@ using ::log2;
using ::logb;
using ::lrint;
using ::lround;
+using ::llround;
using ::modf;
using ::nan;
using ::nanf;
More information about the cfe-commits
mailing list