[clang] 12da97e - [OpenMP][AMDGCN] Initial math headers support

Pushpinder Singh via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 30 07:52:52 PDT 2021


Author: Pushpinder Singh
Date: 2021-07-30T14:52:41Z
New Revision: 12da97ea10a941f0123340831300d09a2121e173

URL: https://github.com/llvm/llvm-project/commit/12da97ea10a941f0123340831300d09a2121e173
DIFF: https://github.com/llvm/llvm-project/commit/12da97ea10a941f0123340831300d09a2121e173.diff

LOG: [OpenMP][AMDGCN] Initial math headers support

With this patch, OpenMP on AMDGCN will use the math functions
provided by ROCm ocml library. Linking device code to the ocml will be
done in the next patch.

Reviewed By: JonChesterfield, jdoerfert, scchan

Differential Revision: https://reviews.llvm.org/D104904

Added: 
    clang/test/Headers/Inputs/include/algorithm
    clang/test/Headers/Inputs/include/utility
    clang/test/Headers/amdgcn_openmp_device_math.c

Modified: 
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Headers/__clang_hip_cmath.h
    clang/lib/Headers/__clang_hip_math.h
    clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
    clang/lib/Headers/openmp_wrappers/cmath
    clang/lib/Headers/openmp_wrappers/math.h
    clang/test/Headers/Inputs/include/cstdlib
    clang/test/Headers/openmp_device_math_isnan.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index e13302528cbd1..278ae118563d6 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1256,7 +1256,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   // If we are offloading to a target via OpenMP we need to include the
   // openmp_wrappers folder which contains alternative system headers.
   if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
-      getToolChain().getTriple().isNVPTX()){
+      (getToolChain().getTriple().isNVPTX() ||
+       getToolChain().getTriple().isAMDGCN())) {
     if (!Args.hasArg(options::OPT_nobuiltininc)) {
       // Add openmp_wrappers/* to our system include path.  This lets us wrap
       // standard library headers.

diff  --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h
index 7342705434e6b..d488db0a94d9d 100644
--- a/clang/lib/Headers/__clang_hip_cmath.h
+++ b/clang/lib/Headers/__clang_hip_cmath.h
@@ -10,7 +10,7 @@
 #ifndef __CLANG_HIP_CMATH_H__
 #define __CLANG_HIP_CMATH_H__
 
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
@@ -25,31 +25,43 @@
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
+#pragma push_macro("__CONSTEXPR__")
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static __attribute__((always_inline, nothrow))
+#define __CONSTEXPR__ constexpr
+#else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#define __CONSTEXPR__
+#endif // __OPENMP_AMDGCN__
 
 // Start with functions that cannot be defined by DEF macros below.
 #if defined(__cplusplus)
-__DEVICE__ double abs(double __x) { return ::fabs(__x); }
-__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
-__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
-__DEVICE__ long abs(long __n) { return ::labs(__n); }
-__DEVICE__ float fma(float __x, float __y, float __z) {
+#if defined __OPENMP_AMDGCN__
+__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); }
+__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); }
+__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); }
+#endif
+__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
+__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
+__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
+__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
+__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
   return ::fmaf(__x, __y, __z);
 }
 #if !defined(__HIPCC_RTC__)
 // The value returned by fpclassify is platform dependent, therefore it is not
 // supported by hipRTC.
-__DEVICE__ int fpclassify(float __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
 }
-__DEVICE__ int fpclassify(double __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
 }
 #endif // !defined(__HIPCC_RTC__)
 
-__DEVICE__ float frexp(float __arg, int *__exp) {
+__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
   return ::frexpf(__arg, __exp);
 }
 
@@ -71,93 +83,101 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
 //        of the variants inside the inner region and avoid the clash.
 #pragma omp begin declare variant match(implementation = {vendor(llvm)})
 
-__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }
 
 #pragma omp end declare variant
 #endif // defined(__OPENMP_AMDGCN__)
 
-__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
 
 #if defined(__OPENMP_AMDGCN__)
 #pragma omp end declare variant
 #endif // defined(__OPENMP_AMDGCN__)
 
-__DEVICE__ bool isgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
   return __builtin_isgreater(__x, __y);
 }
-__DEVICE__ bool isgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
   return __builtin_isgreater(__x, __y);
 }
-__DEVICE__ bool isgreaterequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
   return __builtin_isgreaterequal(__x, __y);
 }
-__DEVICE__ bool isgreaterequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
   return __builtin_isgreaterequal(__x, __y);
 }
-__DEVICE__ bool isless(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
   return __builtin_isless(__x, __y);
 }
-__DEVICE__ bool isless(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
   return __builtin_isless(__x, __y);
 }
-__DEVICE__ bool islessequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
   return __builtin_islessequal(__x, __y);
 }
-__DEVICE__ bool islessequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
   return __builtin_islessequal(__x, __y);
 }
-__DEVICE__ bool islessgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
   return __builtin_islessgreater(__x, __y);
 }
-__DEVICE__ bool islessgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
   return __builtin_islessgreater(__x, __y);
 }
-__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isunordered(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
+  return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
+  return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
   return __builtin_isunordered(__x, __y);
 }
-__DEVICE__ bool isunordered(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
   return __builtin_isunordered(__x, __y);
 }
-__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
-__DEVICE__ float pow(float __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
+  return ::modff(__x, __iptr);
+}
+__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
   return ::powif(__base, __iexp);
 }
-__DEVICE__ double pow(double __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
   return ::powi(__base, __iexp);
 }
-__DEVICE__ float remquo(float __x, float __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
   return ::remquof(__x, __y, __quo);
 }
-__DEVICE__ float scalbln(float __x, long int __n) {
+__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
   return ::scalblnf(__x, __n);
 }
-__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
-__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }
 
 // Notably missing above is nexttoward.  We omit it because
 // ocml doesn't provide an implementation, and we don't want to be in the
 // business of implementing tricky libm functions in this header.
 
 // Other functions.
-__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
+__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
+                                      _Float16 __z) {
   return __ocml_fma_f16(__x, __y, __z);
 }
-__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
   return __ocml_pown_f16(__base, __iexp);
 }
 
+#ifndef __OPENMP_AMDGCN__
 // BEGIN DEF_FUN and HIP_OVERLOAD
 
 // BEGIN DEF_FUN
@@ -168,18 +188,19 @@ __DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
 
 // Define cmath functions with float argument and returns __retty.
 #define __DEF_FUN1(__retty, __func)                                            \
-  __DEVICE__                                                                   \
-  __retty __func(float __x) { return __func##f(__x); }
+  __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }
 
 // Define cmath functions with two float arguments and returns __retty.
 #define __DEF_FUN2(__retty, __func)                                            \
-  __DEVICE__                                                                   \
-  __retty __func(float __x, float __y) { return __func##f(__x, __y); }
+  __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) {              \
+    return __func##f(__x, __y);                                                \
+  }
 
 // Define cmath functions with a float and an int argument and returns __retty.
 #define __DEF_FUN2_FI(__retty, __func)                                         \
-  __DEVICE__                                                                   \
-  __retty __func(float __x, int __y) { return __func##f(__x, __y); }
+  __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) {                \
+    return __func##f(__x, __y);                                                \
+  }
 
 __DEF_FUN1(float, acos)
 __DEF_FUN1(float, acosh)
@@ -426,7 +447,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
 // floor(double).
 #define __HIP_OVERLOAD1(__retty, __fn)                                         \
   template <typename __T>                                                      \
-  __DEVICE__                                                                   \
+  __DEVICE__ __CONSTEXPR__                                                     \
       typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type  \
       __fn(__T __x) {                                                          \
     return ::__fn((double)__x);                                                \
@@ -438,7 +459,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
 #if __cplusplus >= 201103L
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
   template <typename __T1, typename __T2>                                      \
-  __DEVICE__ typename __hip_enable_if<                                         \
+  __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<                           \
       __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value,  \
       typename __hip::__promote<__T1, __T2>::type>::type                       \
   __fn(__T1 __x, __T2 __y) {                                                   \
@@ -448,10 +469,11 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
 #else
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
   template <typename __T1, typename __T2>                                      \
-  __DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&     \
-                                          __hip::is_arithmetic<__T2>::value,   \
-                                      __retty>::type                           \
-  __fn(__T1 __x, __T2 __y) {                                                   \
+  __DEVICE__ __CONSTEXPR__                                                     \
+      typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&            \
+                                   __hip::is_arithmetic<__T2>::value,          \
+                               __retty>::type                                  \
+      __fn(__T1 __x, __T2 __y) {                                               \
     return __fn((double)__x, (double)__y);                                     \
   }
 #endif
@@ -526,7 +548,7 @@ __HIP_OVERLOAD2(double, min)
 // Additional Overloads that don't quite match HIP_OVERLOAD.
 #if __cplusplus >= 201103L
 template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<
+__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
     __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
         __hip::is_arithmetic<__T3>::value,
     typename __hip::__promote<__T1, __T2, __T3>::type>::type
@@ -536,31 +558,32 @@ fma(__T1 __x, __T2 __y, __T3 __z) {
 }
 #else
 template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
-                                        __hip::is_arithmetic<__T2>::value &&
-                                        __hip::is_arithmetic<__T3>::value,
-                                    double>::type
-fma(__T1 __x, __T2 __y, __T3 __z) {
+__DEVICE__ __CONSTEXPR__
+    typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+                                 __hip::is_arithmetic<__T2>::value &&
+                                 __hip::is_arithmetic<__T3>::value,
+                             double>::type
+    fma(__T1 __x, __T2 __y, __T3 __z) {
   return ::fma((double)__x, (double)__y, (double)__z);
 }
 #endif
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     frexp(__T __x, int *__exp) {
   return ::frexp((double)__x, __exp);
 }
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     ldexp(__T __x, int __exp) {
   return ::ldexp((double)__x, __exp);
 }
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     modf(__T __x, double *__exp) {
   return ::modf((double)__x, __exp);
@@ -568,7 +591,7 @@ __DEVICE__
 
 #if __cplusplus >= 201103L
 template <typename __T1, typename __T2>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
                                  __hip::is_arithmetic<__T2>::value,
                              typename __hip::__promote<__T1, __T2>::type>::type
@@ -578,23 +601,24 @@ __DEVICE__
 }
 #else
 template <typename __T1, typename __T2>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
-                                        __hip::is_arithmetic<__T2>::value,
-                                    double>::type
-remquo(__T1 __x, __T2 __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__
+    typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+                                 __hip::is_arithmetic<__T2>::value,
+                             double>::type
+    remquo(__T1 __x, __T2 __y, int *__quo) {
   return ::remquo((double)__x, (double)__y, __quo);
 }
 #endif
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     scalbln(__T __x, long int __exp) {
   return ::scalbln((double)__x, __exp);
 }
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     scalbn(__T __x, int __exp) {
   return ::scalbn((double)__x, __exp);
@@ -607,8 +631,10 @@ __DEVICE__
 
 // END DEF_FUN and HIP_OVERLOAD
 
+#endif // ifndef __OPENMP_AMDGCN__
 #endif // defined(__cplusplus)
 
+#ifndef __OPENMP_AMDGCN__
 // Define these overloads inside the namespace our standard library uses.
 #if !defined(__HIPCC_RTC__)
 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
@@ -781,22 +807,26 @@ _GLIBCXX_END_NAMESPACE_VERSION
 #if defined(__cplusplus)
 extern "C" {
 #endif // defined(__cplusplus)
-__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
+                                                                    double y) {
   return cosh(x) * y;
 }
-__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
+                                                                    float y) {
   return coshf(x) * y;
 }
-__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
   return fpclassify(*p);
 }
-__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
   return fpclassify(*p);
 }
-__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
+                                                                    double y) {
   return sinh(x) * y;
 }
-__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
+                                                                    float y) {
   return sinhf(x) * y;
 }
 #if defined(__cplusplus)
@@ -804,7 +834,9 @@ __DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
 #endif // defined(__cplusplus)
 #endif // defined(_MSC_VER)
 #endif // !defined(__HIPCC_RTC__)
+#endif // ifndef __OPENMP_AMDGCN__
 
 #pragma pop_macro("__DEVICE__")
+#pragma pop_macro("__CONSTEXPR__")
 
 #endif // __CLANG_HIP_CMATH_H__

diff  --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 1f0982d92eff3..9effaa18d3e8c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -9,7 +9,7 @@
 #ifndef __CLANG_HIP_MATH_H__
 #define __CLANG_HIP_MATH_H__
 
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
@@ -19,18 +19,27 @@
 #endif
 #include <limits.h>
 #include <stdint.h>
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
+
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
+#else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#endif
 
 // A few functions return bool type starting only in C++11.
 #pragma push_macro("__RETURN_TYPE")
+#ifdef __OPENMP_AMDGCN__
+#define __RETURN_TYPE int
+#else
 #if defined(__cplusplus)
 #define __RETURN_TYPE bool
 #else
 #define __RETURN_TYPE int
 #endif
+#endif // __OPENMP_AMDGCN__
 
 #if defined (__cplusplus) && __cplusplus < 201103L
 // emulate static_assert on type sizes
@@ -1262,7 +1271,7 @@ float min(float __x, float __y) { return fminf(__x, __y); }
 __DEVICE__
 double min(double __x, double __y) { return fmin(__x, __y); }
 
-#if !defined(__HIPCC_RTC__)
+#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
 __host__ inline static int min(int __arg1, int __arg2) {
   return std::min(__arg1, __arg2);
 }
@@ -1270,7 +1279,7 @@ __host__ inline static int min(int __arg1, int __arg2) {
 __host__ inline static int max(int __arg1, int __arg2) {
   return std::max(__arg1, __arg2);
 }
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
 #endif
 
 #pragma pop_macro("__DEVICE__")

diff  --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
index 953857badfc4c..99cf2483e7343 100644
--- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -14,13 +14,13 @@
 #error "This file is for OpenMP compilation only."
 #endif
 
-#pragma omp begin declare variant match(                                       \
-    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#pragma omp begin declare variant match(                                       \
+    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+
 #define __CUDA__
 #define __OPENMP_NVPTX__
 
@@ -33,12 +33,32 @@ extern "C" {
 #undef __OPENMP_NVPTX__
 #undef __CUDA__
 
-#ifdef __cplusplus
-} // extern "C"
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+// Import types which will be used by __clang_hip_libdevice_declares.h
+#ifndef __cplusplus
+#include <stdbool.h>
+#include <stdint.h>
 #endif
 
+#define __OPENMP_AMDGCN__
+#pragma push_macro("__device__")
+#define __device__
+
+/// Include declarations for libdevice functions.
+#include <__clang_hip_libdevice_declares.h>
+
+#pragma pop_macro("__device__")
+#undef __OPENMP_AMDGCN__
+
 #pragma omp end declare variant
 
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
 // Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
 // need to `include <new>` in C++ mode.
 #ifdef __cplusplus

diff  --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath
index 1aff66af7d52d..22a720aca9561 100644
--- a/clang/lib/Headers/openmp_wrappers/cmath
+++ b/clang/lib/Headers/openmp_wrappers/cmath
@@ -75,4 +75,58 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
 
 #pragma omp end declare variant
 
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#pragma push_macro("__constant__")
+#define __constant__ __attribute__((constant))
+#define __OPENMP_AMDGCN__
+
+#include <__clang_hip_cmath.h>
+
+#pragma pop_macro("__constant__")
+#undef __OPENMP_AMDGCN__
+
+// Define overloads otherwise which are absent
+#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
+
+__DEVICE__ float acos(float __x) { return ::acosf(__x); }
+__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
+__DEVICE__ float asin(float __x) { return ::asinf(__x); }
+__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
+__DEVICE__ float atan(float __x) { return ::atanf(__x); }
+__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
+__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
+__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
+__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
+__DEVICE__ float erf(float __x) { return ::erff(__x); }
+__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
+__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
+__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
+__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
+__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
+__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
+__DEVICE__ float ldexp(float __arg, int __exp) {
+  return ::ldexpf(__arg, __exp);
+}
+__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
+__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
+__DEVICE__ float logb(float __x) { return ::logbf(__x); }
+__DEVICE__ float nextafter(float __x, float __y) {
+  return ::nextafterf(__x, __y);
+}
+__DEVICE__ float remainder(float __x, float __y) {
+  return ::remainderf(__x, __y);
+}
+__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
+__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
+__DEVICE__ float tan(float __x) { return ::tanf(__x); }
+__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
+__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
+
+#undef __DEVICE__
+
+#pragma omp end declare variant
+#endif // __AMDGCN__
+
 #endif

diff  --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h
index c64af8b13ece4..cd553defaa3bd 100644
--- a/clang/lib/Headers/openmp_wrappers/math.h
+++ b/clang/lib/Headers/openmp_wrappers/math.h
@@ -48,4 +48,12 @@
 
 #pragma omp end declare variant
 
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#define __OPENMP_AMDGCN__
+#include <__clang_hip_math.h>
+#undef __OPENMP_AMDGCN__
+
+#pragma omp end declare variant
+
 #endif

diff  --git a/clang/test/Headers/Inputs/include/algorithm b/clang/test/Headers/Inputs/include/algorithm
new file mode 100644
index 0000000000000..9122ec7179bfc
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/algorithm
@@ -0,0 +1,6 @@
+#pragma once
+
+namespace std {
+ template<class T> constexpr const T& min(const T& a, const T& b);
+ template<class T> constexpr const T& max(const T& a, const T& b);
+}
\ No newline at end of file

diff  --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib
index 689b5e06edec9..0b0adf4387309 100644
--- a/clang/test/Headers/Inputs/include/cstdlib
+++ b/clang/test/Headers/Inputs/include/cstdlib
@@ -27,3 +27,4 @@ float abs(float __x) { return fabs(__x); }
 double abs(double __x) { return fabs(__x); }
 
 }
+

diff  --git a/clang/test/Headers/Inputs/include/utility b/clang/test/Headers/Inputs/include/utility
new file mode 100644
index 0000000000000..3f59c932d39b0
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+

diff  --git a/clang/test/Headers/amdgcn_openmp_device_math.c b/clang/test/Headers/amdgcn_openmp_device_math.c
new file mode 100644
index 0000000000000..cab1e88156f67
--- /dev/null
+++ b/clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+    // CHECK: call double @__ocml_sin_f64
+    double l1 = sin(x);
+    // CHECK: call double @__ocml_cos_f64
+    double l2 = cos(x);
+    // CHECK: call double @__ocml_fabs_f64
+    double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+    // CHECK-C: call double @__ocml_sin_f64
+    // CHECK-CPP: call float @__ocml_sin_f32
+    float l1 = sin(x);
+    // CHECK-C: call double @__ocml_cos_f64
+    // CHECK-CPP: call float @__ocml_cos_f32
+    float l2 = cos(x);
+    // CHECK-C: call double @__ocml_fabs_f64
+    // CHECK-CPP: call float @__ocml_fabs_f32
+    float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+    // CHECK: call float @__ocml_sin_f32
+    float l1 = sinf(x);
+    // CHECK: call float @__ocml_cos_f32
+    float l2 = cosf(x);
+    // CHECK: call float @__ocml_fabs_f32
+    float l3 = fabsf(x);
+  }
+}

diff  --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp
index 7a75e4250c951..ddb3d75ff1157 100644
--- a/clang/test/Headers/openmp_device_math_isnan.cpp
+++ b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }


        


More information about the cfe-commits mailing list