[clang] 968899a - [OpenMP][AMDGCN] Initial math headers support

Wed Jul 21 08:16:00 PDT 2021

Author: Pushpinder Singh
Date: 2021-07-21T16:15:39+01:00
New Revision: 968899ad9cf17579f9867dafb35c4d97bad0863f

URL: https://github.com/llvm/llvm-project/commit/968899ad9cf17579f9867dafb35c4d97bad0863f
DIFF: https://github.com/llvm/llvm-project/commit/968899ad9cf17579f9867dafb35c4d97bad0863f.diff

LOG: [OpenMP][AMDGCN] Initial math headers support

With this patch, OpenMP on AMDGCN will use the math functions
provided by ROCm ocml library. Linking device code to the ocml will be
done in the next patch.

Reviewed By: JonChesterfield, jdoerfert, scchan

Differential Revision: https://reviews.llvm.org/D104904

Added: 
    clang/test/Headers/Inputs/include/algorithm
    clang/test/Headers/Inputs/include/utility
    clang/test/Headers/amdgcn_openmp_device_math.c

Modified: 
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Headers/__clang_hip_cmath.h
    clang/lib/Headers/__clang_hip_math.h
    clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
    clang/lib/Headers/openmp_wrappers/cmath
    clang/lib/Headers/openmp_wrappers/math.h
    clang/test/Headers/Inputs/include/cstdlib
    clang/test/Headers/openmp_device_math_isnan.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 837ead86d6202..cf8e209ba1af1 100644

--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1255,7 +1255,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   // If we are offloading to a target via OpenMP we need to include the
   // openmp_wrappers folder which contains alternative system headers.
   if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
-      getToolChain().getTriple().isNVPTX()){
+      (getToolChain().getTriple().isNVPTX() ||
+       getToolChain().getTriple().isAMDGCN())) {
     if (!Args.hasArg(options::OPT_nobuiltininc)) {
       // Add openmp_wrappers/* to our system include path.  This lets us wrap
       // standard library headers.

diff  --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h
index 7342705434e6b..6f7cbde38dd20 100644
--- a/clang/lib/Headers/__clang_hip_cmath.h
+++ b/clang/lib/Headers/__clang_hip_cmath.h
@@ -10,7 +10,7 @@
 #ifndef __CLANG_HIP_CMATH_H__
 #define __CLANG_HIP_CMATH_H__
 
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
@@ -25,31 +25,38 @@
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
+#pragma push_macro("__CONSTEXPR__")
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static __attribute__((always_inline, nothrow))
+#define __CONSTEXPR__ constexpr
+#else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#define __CONSTEXPR__
+#endif // __OPENMP_AMDGCN__
 
 // Start with functions that cannot be defined by DEF macros below.
 #if defined(__cplusplus)
-__DEVICE__ double abs(double __x) { return ::fabs(__x); }
-__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
-__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
-__DEVICE__ long abs(long __n) { return ::labs(__n); }
-__DEVICE__ float fma(float __x, float __y, float __z) {
+__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
+__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
+__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
+__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
+__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
   return ::fmaf(__x, __y, __z);
 }
 #if !defined(__HIPCC_RTC__)
 // The value returned by fpclassify is platform dependent, therefore it is not
 // supported by hipRTC.
-__DEVICE__ int fpclassify(float __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
 }
-__DEVICE__ int fpclassify(double __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
 }
 #endif // !defined(__HIPCC_RTC__)
 
-__DEVICE__ float frexp(float __arg, int *__exp) {
+__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
   return ::frexpf(__arg, __exp);
 }
 
@@ -71,90 +78,97 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
 //        of the variants inside the inner region and avoid the clash.
 #pragma omp begin declare variant match(implementation = {vendor(llvm)})
 
-__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }
 
 #pragma omp end declare variant
 #endif // defined(__OPENMP_AMDGCN__)
 
-__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
 
 #if defined(__OPENMP_AMDGCN__)
 #pragma omp end declare variant
 #endif // defined(__OPENMP_AMDGCN__)
 
-__DEVICE__ bool isgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
   return __builtin_isgreater(__x, __y);
 }
-__DEVICE__ bool isgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
   return __builtin_isgreater(__x, __y);
 }
-__DEVICE__ bool isgreaterequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
   return __builtin_isgreaterequal(__x, __y);
 }
-__DEVICE__ bool isgreaterequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
   return __builtin_isgreaterequal(__x, __y);
 }
-__DEVICE__ bool isless(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
   return __builtin_isless(__x, __y);
 }
-__DEVICE__ bool isless(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
   return __builtin_isless(__x, __y);
 }
-__DEVICE__ bool islessequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
   return __builtin_islessequal(__x, __y);
 }
-__DEVICE__ bool islessequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
   return __builtin_islessequal(__x, __y);
 }
-__DEVICE__ bool islessgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
   return __builtin_islessgreater(__x, __y);
 }
-__DEVICE__ bool islessgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
   return __builtin_islessgreater(__x, __y);
 }
-__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isunordered(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
+  return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
+  return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
   return __builtin_isunordered(__x, __y);
 }
-__DEVICE__ bool isunordered(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
   return __builtin_isunordered(__x, __y);
 }
-__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
-__DEVICE__ float pow(float __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
+  return ::modff(__x, __iptr);
+}
+__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
   return ::powif(__base, __iexp);
 }
-__DEVICE__ double pow(double __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
   return ::powi(__base, __iexp);
 }
-__DEVICE__ float remquo(float __x, float __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
   return ::remquof(__x, __y, __quo);
 }
-__DEVICE__ float scalbln(float __x, long int __n) {
+__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
   return ::scalblnf(__x, __n);
 }
-__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
-__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }
 
 // Notably missing above is nexttoward.  We omit it because
 // ocml doesn't provide an implementation, and we don't want to be in the
 // business of implementing tricky libm functions in this header.
 
 // Other functions.
-__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
+__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
+                                      _Float16 __z) {
   return __ocml_fma_f16(__x, __y, __z);
 }
-__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
   return __ocml_pown_f16(__base, __iexp);
 }
 
@@ -168,18 +182,19 @@ __DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
 
 // Define cmath functions with float argument and returns __retty.
 #define __DEF_FUN1(__retty, __func)                                            \
-  __DEVICE__                                                                   \
-  __retty __func(float __x) { return __func##f(__x); }
+  __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }
 
 // Define cmath functions with two float arguments and returns __retty.
 #define __DEF_FUN2(__retty, __func)                                            \
-  __DEVICE__                                                                   \
-  __retty __func(float __x, float __y) { return __func##f(__x, __y); }
+  __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) {              \
+    return __func##f(__x, __y);                                                \
+  }
 
 // Define cmath functions with a float and an int argument and returns __retty.
 #define __DEF_FUN2_FI(__retty, __func)                                         \
-  __DEVICE__                                                                   \
-  __retty __func(float __x, int __y) { return __func##f(__x, __y); }
+  __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) {                \
+    return __func##f(__x, __y);                                                \
+  }
 
 __DEF_FUN1(float, acos)
 __DEF_FUN1(float, acosh)
@@ -426,7 +441,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
 // floor(double).
 #define __HIP_OVERLOAD1(__retty, __fn)                                         \
   template <typename __T>                                                      \
-  __DEVICE__                                                                   \
+  __DEVICE__ __CONSTEXPR__                                                     \
       typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type  \
       __fn(__T __x) {                                                          \
     return ::__fn((double)__x);                                                \
@@ -438,7 +453,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
 #if __cplusplus >= 201103L
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
   template <typename __T1, typename __T2>                                      \
-  __DEVICE__ typename __hip_enable_if<                                         \
+  __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<                           \
       __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value,  \
       typename __hip::__promote<__T1, __T2>::type>::type                       \
   __fn(__T1 __x, __T2 __y) {                                                   \
@@ -448,10 +463,11 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
 #else
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
   template <typename __T1, typename __T2>                                      \
-  __DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&     \
-                                          __hip::is_arithmetic<__T2>::value,   \
-                                      __retty>::type                           \
-  __fn(__T1 __x, __T2 __y) {                                                   \
+  __DEVICE__ __CONSTEXPR__                                                     \
+      typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&            \
+                                   __hip::is_arithmetic<__T2>::value,          \
+                               __retty>::type                                  \
+      __fn(__T1 __x, __T2 __y) {                                               \
     return __fn((double)__x, (double)__y);                                     \
   }
 #endif
@@ -526,7 +542,7 @@ __HIP_OVERLOAD2(double, min)
 // Additional Overloads that don't quite match HIP_OVERLOAD.
 #if __cplusplus >= 201103L
 template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<
+__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
     __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
         __hip::is_arithmetic<__T3>::value,
     typename __hip::__promote<__T1, __T2, __T3>::type>::type
@@ -536,31 +552,32 @@ fma(__T1 __x, __T2 __y, __T3 __z) {
 }
 #else
 template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
-                                        __hip::is_arithmetic<__T2>::value &&
-                                        __hip::is_arithmetic<__T3>::value,
-                                    double>::type
-fma(__T1 __x, __T2 __y, __T3 __z) {
+__DEVICE__ __CONSTEXPR__
+    typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+                                 __hip::is_arithmetic<__T2>::value &&
+                                 __hip::is_arithmetic<__T3>::value,
+                             double>::type
+    fma(__T1 __x, __T2 __y, __T3 __z) {
   return ::fma((double)__x, (double)__y, (double)__z);
 }
 #endif
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     frexp(__T __x, int *__exp) {
   return ::frexp((double)__x, __exp);
 }
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     ldexp(__T __x, int __exp) {
   return ::ldexp((double)__x, __exp);
 }
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     modf(__T __x, double *__exp) {
   return ::modf((double)__x, __exp);
@@ -568,7 +585,7 @@ __DEVICE__
 
 #if __cplusplus >= 201103L
 template <typename __T1, typename __T2>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
                                  __hip::is_arithmetic<__T2>::value,
                              typename __hip::__promote<__T1, __T2>::type>::type
@@ -578,23 +595,24 @@ __DEVICE__
 }
 #else
 template <typename __T1, typename __T2>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
-                                        __hip::is_arithmetic<__T2>::value,
-                                    double>::type
-remquo(__T1 __x, __T2 __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__
+    typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+                                 __hip::is_arithmetic<__T2>::value,
+                             double>::type
+    remquo(__T1 __x, __T2 __y, int *__quo) {
   return ::remquo((double)__x, (double)__y, __quo);
 }
 #endif
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     scalbln(__T __x, long int __exp) {
   return ::scalbln((double)__x, __exp);
 }
 
 template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
     typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
     scalbn(__T __x, int __exp) {
   return ::scalbn((double)__x, __exp);
@@ -781,22 +799,26 @@ _GLIBCXX_END_NAMESPACE_VERSION
 #if defined(__cplusplus)
 extern "C" {
 #endif // defined(__cplusplus)
-__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
+                                                                    double y) {
   return cosh(x) * y;
 }
-__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
+                                                                    float y) {
   return coshf(x) * y;
 }
-__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
   return fpclassify(*p);
 }
-__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
   return fpclassify(*p);
 }
-__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
+                                                                    double y) {
   return sinh(x) * y;
 }
-__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
+                                                                    float y) {
   return sinhf(x) * y;
 }
 #if defined(__cplusplus)
@@ -806,5 +828,6 @@ __DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma pop_macro("__DEVICE__")
+#pragma pop_macro("__CONSTEXPR__")
 
 #endif // __CLANG_HIP_CMATH_H__

diff  --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 1f0982d92eff3..9effaa18d3e8c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -9,7 +9,7 @@
 #ifndef __CLANG_HIP_MATH_H__
 #define __CLANG_HIP_MATH_H__
 
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
@@ -19,18 +19,27 @@
 #endif
 #include <limits.h>
 #include <stdint.h>
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
+
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
+#else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#endif
 
 // A few functions return bool type starting only in C++11.
 #pragma push_macro("__RETURN_TYPE")
+#ifdef __OPENMP_AMDGCN__
+#define __RETURN_TYPE int
+#else
 #if defined(__cplusplus)
 #define __RETURN_TYPE bool
 #else
 #define __RETURN_TYPE int
 #endif
+#endif // __OPENMP_AMDGCN__
 
 #if defined (__cplusplus) && __cplusplus < 201103L
 // emulate static_assert on type sizes
@@ -1262,7 +1271,7 @@ float min(float __x, float __y) { return fminf(__x, __y); }
 __DEVICE__
 double min(double __x, double __y) { return fmin(__x, __y); }
 
-#if !defined(__HIPCC_RTC__)
+#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
 __host__ inline static int min(int __arg1, int __arg2) {
   return std::min(__arg1, __arg2);
 }
@@ -1270,7 +1279,7 @@ __host__ inline static int min(int __arg1, int __arg2) {
 __host__ inline static int max(int __arg1, int __arg2) {
   return std::max(__arg1, __arg2);
 }
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
 #endif
 
 #pragma pop_macro("__DEVICE__")

diff  --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
index 953857badfc4c..99cf2483e7343 100644
--- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -14,13 +14,13 @@
 #error "This file is for OpenMP compilation only."
 #endif
 
-#pragma omp begin declare variant match(                                       \
-    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#pragma omp begin declare variant match(                                       \
+    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+
 #define __CUDA__
 #define __OPENMP_NVPTX__
 
@@ -33,12 +33,32 @@ extern "C" {
 #undef __OPENMP_NVPTX__
 #undef __CUDA__
 
-#ifdef __cplusplus
-} // extern "C"
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+// Import types which will be used by __clang_hip_libdevice_declares.h
+#ifndef __cplusplus
+#include <stdbool.h>
+#include <stdint.h>
 #endif
 
+#define __OPENMP_AMDGCN__
+#pragma push_macro("__device__")
+#define __device__
+
+/// Include declarations for libdevice functions.
+#include <__clang_hip_libdevice_declares.h>
+
+#pragma pop_macro("__device__")
+#undef __OPENMP_AMDGCN__
+
 #pragma omp end declare variant
 
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
 // Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
 // need to `include <new>` in C++ mode.
 #ifdef __cplusplus

diff  --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath
index 1aff66af7d52d..4fb3d2dff75b3 100644
--- a/clang/lib/Headers/openmp_wrappers/cmath
+++ b/clang/lib/Headers/openmp_wrappers/cmath
@@ -75,4 +75,19 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
 
 #pragma omp end declare variant
 
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#pragma push_macro("__constant__")
+#define __constant__ __attribute__((constant))
+#define __OPENMP_AMDGCN__
+
+#include <__clang_hip_cmath.h>
+
+#pragma pop_macro("__constant__")
+#undef __OPENMP_AMDGCN__
+
+#pragma omp end declare variant
+#endif // __AMDGCN__
+
 #endif

diff  --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h
index c64af8b13ece4..cd553defaa3bd 100644
--- a/clang/lib/Headers/openmp_wrappers/math.h
+++ b/clang/lib/Headers/openmp_wrappers/math.h
@@ -48,4 +48,12 @@
 
 #pragma omp end declare variant
 
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#define __OPENMP_AMDGCN__
+#include <__clang_hip_math.h>
+#undef __OPENMP_AMDGCN__
+
+#pragma omp end declare variant
+
 #endif

diff  --git a/clang/test/Headers/Inputs/include/algorithm b/clang/test/Headers/Inputs/include/algorithm
new file mode 100644
index 0000000000000..9122ec7179bfc
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/algorithm
@@ -0,0 +1,6 @@
+#pragma once
+
+namespace std {
+ template<class T> constexpr const T& min(const T& a, const T& b);
+ template<class T> constexpr const T& max(const T& a, const T& b);
+}
\ No newline at end of file

diff  --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib
index 689b5e06edec9..6be37ff5866a7 100644
--- a/clang/test/Headers/Inputs/include/cstdlib
+++ b/clang/test/Headers/Inputs/include/cstdlib
@@ -21,9 +21,13 @@ abs(long __i) { return __builtin_labs(__i); }
 inline long long
 abs(long long __x) { return __builtin_llabs (__x); }
 
+// amdgcn already provides definition of fabs
+#ifndef __AMDGCN__
 float fabs(float __x) { return __builtin_fabs(__x); }
+#endif
 
 float abs(float __x) { return fabs(__x); }
 double abs(double __x) { return fabs(__x); }
 
 }
+

diff  --git a/clang/test/Headers/Inputs/include/utility b/clang/test/Headers/Inputs/include/utility
new file mode 100644
index 0000000000000..3f59c932d39b0
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/utility
@@ -0,0 +1,2 @@
+#pragma once
+

diff  --git a/clang/test/Headers/amdgcn_openmp_device_math.c b/clang/test/Headers/amdgcn_openmp_device_math.c
new file mode 100644
index 0000000000000..cab1e88156f67
--- /dev/null
+++ b/clang/test/Headers/amdgcn_openmp_device_math.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+void test_math_f64(double x) {
+// CHECK-LABEL: define {{.*}}test_math_f64
+#pragma omp target
+  {
+    // CHECK: call double @__ocml_sin_f64
+    double l1 = sin(x);
+    // CHECK: call double @__ocml_cos_f64
+    double l2 = cos(x);
+    // CHECK: call double @__ocml_fabs_f64
+    double l3 = fabs(x);
+  }
+}
+
+void test_math_f32(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32
+#pragma omp target
+  {
+    // CHECK-C: call double @__ocml_sin_f64
+    // CHECK-CPP: call float @__ocml_sin_f32
+    float l1 = sin(x);
+    // CHECK-C: call double @__ocml_cos_f64
+    // CHECK-CPP: call float @__ocml_cos_f32
+    float l2 = cos(x);
+    // CHECK-C: call double @__ocml_fabs_f64
+    // CHECK-CPP: call float @__ocml_fabs_f32
+    float l3 = fabs(x);
+  }
+}
+void test_math_f32_suffix(float x) {
+// CHECK-LABEL: define {{.*}}test_math_f32_suffix
+#pragma omp target
+  {
+    // CHECK: call float @__ocml_sin_f32
+    float l1 = sinf(x);
+    // CHECK: call float @__ocml_cos_f32
+    float l2 = cosf(x);
+    // CHECK: call float @__ocml_fabs_f32
+    float l3 = fabsf(x);
+  }
+}

diff  --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp
index 7a75e4250c951..ddb3d75ff1157 100644
--- a/clang/test/Headers/openmp_device_math_isnan.cpp
+++ b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -21,14 +21,14 @@
 double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call i32 @__nv_isnanf(float
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
   // BOOL_RETURN: call i32 @__nv_isnanf(float
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
   r += std::isnan(f);
   // INT_RETURN: call i32 @__nv_isnand(double
-  // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
+  // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
   // BOOL_RETURN: call i32 @__nv_isnand(double
-  // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
+  // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
   r += std::isnan(d);
   return r;
 }