[clang] d999cbc - [OpenMP] Initial support for std::complex in target regions

Wed Jul 8 15:36:14 PDT 2020

Author: Johannes Doerfert
Date: 2020-07-08T17:33:59-05:00
New Revision: d999cbc98832154e15e786b98281211d5c1b9f5d

URL: https://github.com/llvm/llvm-project/commit/d999cbc98832154e15e786b98281211d5c1b9f5d
DIFF: https://github.com/llvm/llvm-project/commit/d999cbc98832154e15e786b98281211d5c1b9f5d.diff

LOG: [OpenMP] Initial support for std::complex in target regions

This simply follows the scheme we have for other wrappers. It resolves
the current link problem, e.g., `__muldc3 not found`, when std::complex
operations are used on a device.

This will not allow complex make math function calls to work properly,
e.g., sin, but that is more complex (pan intended) anyway.

Reviewed By: tra, JonChesterfield

Differential Revision: https://reviews.llvm.org/D80897

Added: 
    clang/lib/Headers/openmp_wrappers/complex
    clang/lib/Headers/openmp_wrappers/complex.h
    clang/test/Headers/Inputs/include/complex
    clang/test/Headers/nvptx_device_math_complex.cpp

Modified: 
    clang/lib/Headers/CMakeLists.txt
    clang/lib/Headers/__clang_cuda_complex_builtins.h
    clang/lib/Headers/__clang_cuda_math.h
    clang/test/Headers/Inputs/include/cmath
    clang/test/Headers/Inputs/include/cstdlib
    clang/test/Headers/nvptx_device_math_complex.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index e7bee192d918..0692fe75a441 100644

--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -151,6 +151,8 @@ set(ppc_wrapper_files
 set(openmp_wrapper_files
   openmp_wrappers/math.h
   openmp_wrappers/cmath
+  openmp_wrappers/complex.h
+  openmp_wrappers/complex
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/new
 )

diff  --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index 576a958b16bb..d698be71d011 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -13,10 +13,61 @@
 // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3.  These are
 // libgcc functions that clang assumes are available when compiling c99 complex
 // operations.  (These implementations come from libc++, and have been modified
-// to work with CUDA.)
+// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
 
-extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
-                                                      double __c, double __d) {
+#pragma push_macro("__DEVICE__")
+#ifdef _OPENMP
+#pragma omp declare target
+#define __DEVICE__ __attribute__((noinline, nothrow, cold))
+#else
+#define __DEVICE__ __device__ inline
+#endif
+
+// Make the algorithms available for C and C++ by selecting the right functions.
+#if defined(__cplusplus)
+// TODO: In OpenMP mode we cannot overload isinf/isnan/isfinite the way we
+// overload all other math functions because old math system headers and not
+// always conformant and return an integer instead of a boolean. Until that has
+// been addressed we need to work around it. For now, we substituate with the
+// calls we would have used to implement those three functions. Note that we
+// could use the C alternatives as well.
+#define _ISNANd ::__isnan
+#define _ISNANf ::__isnanf
+#define _ISINFd ::__isinf
+#define _ISINFf ::__isinff
+#define _ISFINITEd ::__isfinited
+#define _ISFINITEf ::__finitef
+#define _COPYSIGNd std::copysign
+#define _COPYSIGNf std::copysign
+#define _SCALBNd std::scalbn
+#define _SCALBNf std::scalbn
+#define _ABSd std::abs
+#define _ABSf std::abs
+#define _LOGBd std::logb
+#define _LOGBf std::logb
+#else
+#define _ISNANd isnan
+#define _ISNANf isnanf
+#define _ISINFd isinf
+#define _ISINFf isinff
+#define _ISFINITEd isfinite
+#define _ISFINITEf isfinitef
+#define _COPYSIGNd copysign
+#define _COPYSIGNf copysignf
+#define _SCALBNd scalbn
+#define _SCALBNf scalbnf
+#define _ABSd abs
+#define _ABSf absf
+#define _LOGBd logb
+#define _LOGBf logbf
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
+                                    double __d) {
   double __ac = __a * __c;
   double __bd = __b * __d;
   double __ad = __a * __d;
@@ -24,50 +75,49 @@ extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
   double _Complex z;
   __real__(z) = __ac - __bd;
   __imag__(z) = __ad + __bc;
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
+  if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
     int __recalc = 0;
-    if (std::isinf(__a) || std::isinf(__b)) {
-      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (_ISINFd(__a) || _ISINFd(__b)) {
+      __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
+      __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
+      if (_ISNANd(__c))
+        __c = _COPYSIGNd(0, __c);
+      if (_ISNANd(__d))
+        __d = _COPYSIGNd(0, __d);
       __recalc = 1;
     }
-    if (std::isinf(__c) || std::isinf(__d)) {
-      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
+    if (_ISINFd(__c) || _ISINFd(__d)) {
+      __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
+      __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
+      if (_ISNANd(__a))
+        __a = _COPYSIGNd(0, __a);
+      if (_ISNANd(__b))
+        __b = _COPYSIGNd(0, __b);
       __recalc = 1;
     }
-    if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
-                      std::isinf(__ad) || std::isinf(__bc))) {
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (!__recalc &&
+        (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
+      if (_ISNANd(__a))
+        __a = _COPYSIGNd(0, __a);
+      if (_ISNANd(__b))
+        __b = _COPYSIGNd(0, __b);
+      if (_ISNANd(__c))
+        __c = _COPYSIGNd(0, __c);
+      if (_ISNANd(__d))
+        __d = _COPYSIGNd(0, __d);
       __recalc = 1;
     }
     if (__recalc) {
       // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
       // a device overload (and isn't constexpr before C++11, naturally).
-      __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
-      __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
+      __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
+      __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
     }
   }
   return z;
 }
 
-extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
-                                                     float __c, float __d) {
+__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
   float __ac = __a * __c;
   float __bd = __b * __d;
   float __ad = __a * __d;
@@ -75,36 +125,36 @@ extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
   float _Complex z;
   __real__(z) = __ac - __bd;
   __imag__(z) = __ad + __bc;
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
+  if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
     int __recalc = 0;
-    if (std::isinf(__a) || std::isinf(__b)) {
-      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (_ISINFf(__a) || _ISINFf(__b)) {
+      __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
+      __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
+      if (_ISNANf(__c))
+        __c = _COPYSIGNf(0, __c);
+      if (_ISNANf(__d))
+        __d = _COPYSIGNf(0, __d);
       __recalc = 1;
     }
-    if (std::isinf(__c) || std::isinf(__d)) {
-      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
+    if (_ISINFf(__c) || _ISINFf(__d)) {
+      __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
+      __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
+      if (_ISNANf(__a))
+        __a = _COPYSIGNf(0, __a);
+      if (_ISNANf(__b))
+        __b = _COPYSIGNf(0, __b);
       __recalc = 1;
     }
-    if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
-                      std::isinf(__ad) || std::isinf(__bc))) {
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (!__recalc &&
+        (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
+      if (_ISNANf(__a))
+        __a = _COPYSIGNf(0, __a);
+      if (_ISNANf(__b))
+        __b = _COPYSIGNf(0, __b);
+      if (_ISNANf(__c))
+        __c = _COPYSIGNf(0, __c);
+      if (_ISNANf(__d))
+        __d = _COPYSIGNf(0, __d);
       __recalc = 1;
     }
     if (__recalc) {
@@ -115,36 +165,36 @@ extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
   return z;
 }
 
-extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
-                                                      double __c, double __d) {
+__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
+                                    double __d) {
   int __ilogbw = 0;
   // Can't use std::max, because that's defined in <algorithm>, and we don't
   // want to pull that in for every compile.  The CUDA headers define
   // ::max(float, float) and ::max(double, double), which is sufficient for us.
-  double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
-  if (std::isfinite(__logbw)) {
+  double __logbw = _LOGBd(max(_ABSd(__c), _ABSd(__d)));
+  if (_ISFINITEd(__logbw)) {
     __ilogbw = (int)__logbw;
-    __c = std::scalbn(__c, -__ilogbw);
-    __d = std::scalbn(__d, -__ilogbw);
+    __c = _SCALBNd(__c, -__ilogbw);
+    __d = _SCALBNd(__d, -__ilogbw);
   }
   double __denom = __c * __c + __d * __d;
   double _Complex z;
-  __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
-  __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
-    if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {
-      __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
-      __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
-    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
-               std::isfinite(__d)) {
-      __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);
-      __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
-      __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
-    } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&
-               std::isfinite(__b)) {
-      __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);
+  __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
+  __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
+  if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
+    if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
+      __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
+      __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
+    } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
+               _ISFINITEd(__d)) {
+      __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
+      __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
+      __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
+      __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
+    } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
+               _ISFINITEd(__b)) {
+      __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
+      __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
       __real__(z) = 0.0 * (__a * __c + __b * __d);
       __imag__(z) = 0.0 * (__b * __c - __a * __d);
     }
@@ -152,33 +202,32 @@ extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
   return z;
 }
 
-extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
-                                                     float __c, float __d) {
+__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
   int __ilogbw = 0;
-  float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
-  if (std::isfinite(__logbw)) {
+  float __logbw = _LOGBf(max(_ABSf(__c), _ABSf(__d)));
+  if (_ISFINITEf(__logbw)) {
     __ilogbw = (int)__logbw;
-    __c = std::scalbn(__c, -__ilogbw);
-    __d = std::scalbn(__d, -__ilogbw);
+    __c = _SCALBNf(__c, -__ilogbw);
+    __d = _SCALBNf(__d, -__ilogbw);
   }
   float __denom = __c * __c + __d * __d;
   float _Complex z;
-  __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
-  __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
-    if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {
-      __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
-      __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
-    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
-               std::isfinite(__d)) {
-      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
+  __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
+  __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
+  if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
+    if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
+      __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
+      __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
+    } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
+               _ISFINITEf(__d)) {
+      __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
+      __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
       __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
       __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
-    } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&
-               std::isfinite(__b)) {
-      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
+    } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
+               _ISFINITEf(__b)) {
+      __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
+      __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
       __real__(z) = 0 * (__a * __c + __b * __d);
       __imag__(z) = 0 * (__b * __c - __a * __d);
     }
@@ -186,4 +235,29 @@ extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
   return z;
 }
 
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#undef _ISNANd
+#undef _ISNANf
+#undef _ISINFd
+#undef _ISINFf
+#undef _COPYSIGNd
+#undef _COPYSIGNf
+#undef _ISFINITEd
+#undef _ISFINITEf
+#undef _SCALBNd
+#undef _SCALBNf
+#undef _ABSd
+#undef _ABSf
+#undef _LOGBd
+#undef _LOGBf
+
+#ifdef _OPENMP
+#pragma omp end declare target
+#endif
+
+#pragma pop_macro("__DEVICE__")
+
 #endif // __CLANG_CUDA_COMPLEX_BUILTINS

diff  --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 01db2f29af45..939c71a731e5 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -340,6 +340,16 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
+// In C++ mode OpenMP takes the system versions of these because some math
+// headers provide the wrong return type. This cannot happen in C and we can and
+// want to use the specialized versions right away.
+#if defined(_OPENMP) && !defined(__cplusplus)
+__DEVICE__ int isinff(float __x) { return __nv_isinff(__x); }
+__DEVICE__ int isinf(double __x) { return __nv_isinfd(__x); }
+__DEVICE__ int isnanf(float __x) { return __nv_isnanf(__x); }
+__DEVICE__ int isnan(double __x) { return __nv_isnand(__x); }
+#endif
+
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")

diff  --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex
new file mode 100644
index 000000000000..1ed0b14879ef
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/complex
@@ -0,0 +1,25 @@
+/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_OPENMP_COMPLEX__
+#define __CLANG_OPENMP_COMPLEX__
+
+#ifndef _OPENMP
+#error "This file is for OpenMP compilation only."
+#endif
+
+// We require std::math functions in the complex builtins below.
+#include <cmath>
+
+#define __CUDA__
+#include <__clang_cuda_complex_builtins.h>
+#endif
+
+// Grab the host header too.
+#include_next <complex>

diff  --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h
new file mode 100644
index 000000000000..829c7a785725
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/complex.h
@@ -0,0 +1,25 @@
+/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_OPENMP_COMPLEX_H__
+#define __CLANG_OPENMP_COMPLEX_H__
+
+#ifndef _OPENMP
+#error "This file is for OpenMP compilation only."
+#endif
+
+// We require math functions in the complex builtins below.
+#include <math.h>
+
+#define __CUDA__
+#include <__clang_cuda_complex_builtins.h>
+#endif
+
+// Grab the host header too.
+#include_next <complex.h>

diff  --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath
index 0cadc131d211..5e4e8b67514f 100644
--- a/clang/test/Headers/Inputs/include/cmath
+++ b/clang/test/Headers/Inputs/include/cmath
@@ -49,8 +49,12 @@ double fma(double, double, double);
 float fma(float, float, float);
 double fmax(double, double);
 float fmax(float, float);
+float max(float, float);
+double max(double, double);
 double fmin(double, double);
 float fmin(float, float);
+float min(float, float);
+double min(double, double);
 double fmod(double, double);
 float fmod(float, float);
 int fpclassify(double);

diff  --git a/clang/test/Headers/Inputs/include/complex b/clang/test/Headers/Inputs/include/complex
new file mode 100644
index 000000000000..f3aefab7954b
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/complex
@@ -0,0 +1,301 @@
+#pragma once
+
+#include <cmath>
+
+#define INFINITY (__builtin_inff())
+
+namespace std {
+
+// Taken from libc++
+template <class _Tp>
+class complex {
+public:
+  typedef _Tp value_type;
+
+private:
+  value_type __re_;
+  value_type __im_;
+
+public:
+  complex(const value_type &__re = value_type(), const value_type &__im = value_type())
+      : __re_(__re), __im_(__im) {}
+  template <class _Xp>
+  complex(const complex<_Xp> &__c)
+      : __re_(__c.real()), __im_(__c.imag()) {}
+
+  value_type real() const { return __re_; }
+  value_type imag() const { return __im_; }
+
+  void real(value_type __re) { __re_ = __re; }
+  void imag(value_type __im) { __im_ = __im; }
+
+  complex &operator=(const value_type &__re) {
+    __re_ = __re;
+    __im_ = value_type();
+    return *this;
+  }
+  complex &operator+=(const value_type &__re) {
+    __re_ += __re;
+    return *this;
+  }
+  complex &operator-=(const value_type &__re) {
+    __re_ -= __re;
+    return *this;
+  }
+  complex &operator*=(const value_type &__re) {
+    __re_ *= __re;
+    __im_ *= __re;
+    return *this;
+  }
+  complex &operator/=(const value_type &__re) {
+    __re_ /= __re;
+    __im_ /= __re;
+    return *this;
+  }
+
+  template <class _Xp>
+  complex &operator=(const complex<_Xp> &__c) {
+    __re_ = __c.real();
+    __im_ = __c.imag();
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator+=(const complex<_Xp> &__c) {
+    __re_ += __c.real();
+    __im_ += __c.imag();
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator-=(const complex<_Xp> &__c) {
+    __re_ -= __c.real();
+    __im_ -= __c.imag();
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator*=(const complex<_Xp> &__c) {
+    *this = *this * complex(__c.real(), __c.imag());
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator/=(const complex<_Xp> &__c) {
+    *this = *this / complex(__c.real(), __c.imag());
+    return *this;
+  }
+};
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__x);
+  __t += __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x, const _Tp &__y) {
+  complex<_Tp> __t(__x);
+  __t += __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__y);
+  __t += __x;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__x);
+  __t -= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x, const _Tp &__y) {
+  complex<_Tp> __t(__x);
+  __t -= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(-__y);
+  __t += __x;
+  return __t;
+}
+
+template <class _Tp>
+complex<_Tp>
+operator*(const complex<_Tp> &__z, const complex<_Tp> &__w) {
+  _Tp __a = __z.real();
+  _Tp __b = __z.imag();
+  _Tp __c = __w.real();
+  _Tp __d = __w.imag();
+  _Tp __ac = __a * __c;
+  _Tp __bd = __b * __d;
+  _Tp __ad = __a * __d;
+  _Tp __bc = __b * __c;
+  _Tp __x = __ac - __bd;
+  _Tp __y = __ad + __bc;
+  if (std::isnan(__x) && std::isnan(__y)) {
+    bool __recalc = false;
+    if (std::isinf(__a) || std::isinf(__b)) {
+      __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a);
+      __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b);
+      if (std::isnan(__c))
+        __c = copysign(_Tp(0), __c);
+      if (std::isnan(__d))
+        __d = copysign(_Tp(0), __d);
+      __recalc = true;
+    }
+    if (std::isinf(__c) || std::isinf(__d)) {
+      __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c);
+      __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d);
+      if (std::isnan(__a))
+        __a = copysign(_Tp(0), __a);
+      if (std::isnan(__b))
+        __b = copysign(_Tp(0), __b);
+      __recalc = true;
+    }
+    if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
+                      std::isinf(__ad) || std::isinf(__bc))) {
+      if (std::isnan(__a))
+        __a = copysign(_Tp(0), __a);
+      if (std::isnan(__b))
+        __b = copysign(_Tp(0), __b);
+      if (std::isnan(__c))
+        __c = copysign(_Tp(0), __c);
+      if (std::isnan(__d))
+        __d = copysign(_Tp(0), __d);
+      __recalc = true;
+    }
+    if (__recalc) {
+      __x = _Tp(INFINITY) * (__a * __c - __b * __d);
+      __y = _Tp(INFINITY) * (__a * __d + __b * __c);
+    }
+  }
+  return complex<_Tp>(__x, __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator*(const complex<_Tp> &__x, const _Tp &__y) {
+  complex<_Tp> __t(__x);
+  __t *= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator*(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__y);
+  __t *= __x;
+  return __t;
+}
+
+template <class _Tp>
+complex<_Tp>
+operator/(const complex<_Tp> &__z, const complex<_Tp> &__w) {
+  int __ilogbw = 0;
+  _Tp __a = __z.real();
+  _Tp __b = __z.imag();
+  _Tp __c = __w.real();
+  _Tp __d = __w.imag();
+  _Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
+  if (std::isfinite(__logbw)) {
+    __ilogbw = static_cast<int>(__logbw);
+    __c = scalbn(__c, -__ilogbw);
+    __d = scalbn(__d, -__ilogbw);
+  }
+  _Tp __denom = __c * __c + __d * __d;
+  _Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+  _Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+  if (std::isnan(__x) && std::isnan(__y)) {
+    if ((__denom == _Tp(0)) && (!std::isnan(__a) || !std::isnan(__b))) {
+      __x = copysign(_Tp(INFINITY), __c) * __a;
+      __y = copysign(_Tp(INFINITY), __c) * __b;
+    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && std::isfinite(__d)) {
+      __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a);
+      __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b);
+      __x = _Tp(INFINITY) * (__a * __c + __b * __d);
+      __y = _Tp(INFINITY) * (__b * __c - __a * __d);
+    } else if (std::isinf(__logbw) && __logbw > _Tp(0) && std::isfinite(__a) && std::isfinite(__b)) {
+      __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c);
+      __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d);
+      __x = _Tp(0) * (__a * __c + __b * __d);
+      __y = _Tp(0) * (__b * __c - __a * __d);
+    }
+  }
+  return complex<_Tp>(__x, __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator/(const complex<_Tp> &__x, const _Tp &__y) {
+  return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator/(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__x);
+  __t /= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x) {
+  return __x;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x) {
+  return complex<_Tp>(-__x.real(), -__x.imag());
+}
+
+template <class _Tp>
+inline bool
+operator==(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  return __x.real() == __y.real() && __x.imag() == __y.imag();
+}
+
+template <class _Tp>
+inline bool
+operator==(const complex<_Tp> &__x, const _Tp &__y) {
+  return __x.real() == __y && __x.imag() == 0;
+}
+
+template <class _Tp>
+inline bool
+operator==(const _Tp &__x, const complex<_Tp> &__y) {
+  return __x == __y.real() && 0 == __y.imag();
+}
+
+template <class _Tp>
+inline bool
+operator!=(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  return !(__x == __y);
+}
+
+template <class _Tp>
+inline bool
+operator!=(const complex<_Tp> &__x, const _Tp &__y) {
+  return !(__x == __y);
+}
+
+template <class _Tp>
+inline bool
+operator!=(const _Tp &__x, const complex<_Tp> &__y) {
+  return !(__x == __y);
+}
+
+} // namespace std

diff  --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib
index 00e81e8c4a15..1d1864a98976 100644
--- a/clang/test/Headers/Inputs/include/cstdlib
+++ b/clang/test/Headers/Inputs/include/cstdlib
@@ -24,4 +24,8 @@ inline long long
 abs(long long __x) { return __builtin_llabs (__x); }
 
 float fabs(float __x) { return __builtin_fabs(__x); }
+
+float abs(float __x) { return fabs(__x); }
+double abs(double __x) { return fabs(__x); }
+
 }

diff  --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 43f4ec6a6b59..9b96b5dd8c22 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -1,10 +1,22 @@
 // REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
 // expected-no-diagnostics
 
-// CHECK-DAG: call { float, float } @__divsc3(
-// CHECK-DAG: call { float, float } @__mulsc3(
+#ifdef __cplusplus
+#include <complex>
+#else
+#include <complex.h>
+#endif
+
+// CHECK-DAG: define {{.*}} @__mulsc3
+// CHECK-DAG: define {{.*}} @__muldc3
+// CHECK-DAG: define {{.*}} @__divsc3
+// CHECK-DAG: define {{.*}} @__divdc3
+
+// CHECK-DAG: call float @__nv_scalbnf(
 void test_scmplx(float _Complex a) {
 #pragma omp target
   {
@@ -12,9 +24,7 @@ void test_scmplx(float _Complex a) {
   }
 }
 
-
-// CHECK-DAG: call { double, double } @__divdc3(
-// CHECK-DAG: call { double, double } @__muldc3(
+// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(double _Complex a) {
 #pragma omp target
   {

diff  --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp
new file mode 100644
index 000000000000..15434d907605
--- /dev/null
+++ b/clang/test/Headers/nvptx_device_math_complex.cpp
@@ -0,0 +1,27 @@
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include <complex>
+
+// CHECK-DAG: define {{.*}} @__mulsc3
+// CHECK-DAG: define {{.*}} @__muldc3
+// CHECK-DAG: define {{.*}} @__divsc3
+// CHECK-DAG: define {{.*}} @__divdc3
+
+// CHECK-DAG: call float @__nv_scalbnf(
+void test_scmplx(std::complex<float> a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
+
+// CHECK-DAG: call double @__nv_scalbn(
+void test_dcmplx(std::complex<double> a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}