[clang] Implement macro poisoning for foreign CUDA headers from Nvidia Toolkit (PR #187696)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 20 06:10:16 PDT 2026
https://github.com/fenodem updated https://github.com/llvm/llvm-project/pull/187696
>From f3af2cc29fb5109416c3f7a847c097e644e8eeea Mon Sep 17 00:00:00 2001
From: fenodem <fenodem at protonmail.com>
Date: Fri, 20 Mar 2026 09:34:28 +0000
Subject: [PATCH 1/4] Update __clang_cuda_math_forward_declares.h
---
.../__clang_cuda_math_forward_declares.h | 65 ++++++++++++-------
1 file changed, 40 insertions(+), 25 deletions(-)
diff --git a/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/clang/lib/Headers/__clang_cuda_math_forward_declares.h
index 45fe1e5b1772d..d8091ea77ff49 100644
--- a/clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ b/clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -12,12 +12,23 @@
#error "This file is for CUDA/HIP compilation only."
#endif
-// This file forward-declares of some math functions we (or the CUDA headers)
-// will define later. We need to do this, and do it before cmath is included,
-// because the standard library may have constexpr math functions. In the
-// absence of a prior __device__ decl, those constexpr functions may become
-// implicitly host+device. host+device functions can't be overloaded, so that
-// would preclude the use of our own __device__ overloads for these functions.
+// PURPOSE: Forward-declare __device__ math functions before <cmath> is included.
+// Prevents standard library constexpr functions from becoming implicit
+// __host__ __device__, which would clash with our __device__ overloads.
+
+// ---------------------------------------------------------------------------
+// Return Type: CUDA headers return 'bool' on MSVC, but 'int' on POSIX.
+// Mismatches here cause "functions differ only in return type" errors.
+// ---------------------------------------------------------------------------
+// CORRECTED: Force 'int' for all CUDA compilations to match CUDA SDK headers
+// (math_functions.hpp), which define these as returning int regardless of host.
+#if defined(__CUDA__)
+#define __CUDA_CLASSIFIER_RET_TYPE int
+#elif defined(__OPENMP_NVPTX__)
+#define __CUDA_CLASSIFIER_RET_TYPE int
+#else
+#define __CUDA_CLASSIFIER_RET_TYPE int
+#endif
#pragma push_macro("__DEVICE__")
#define __DEVICE__ \
@@ -89,31 +100,38 @@ __DEVICE__ double hypot(double, double);
__DEVICE__ float hypot(float, float);
__DEVICE__ int ilogb(double);
__DEVICE__ int ilogb(float);
-#ifdef _MSC_VER
-__DEVICE__ bool isfinite(long double);
+
+// ---------------------------------------------------------------------------
+// Classification Functions
+// ---------------------------------------------------------------------------
+// Note: We declare long double versions here if not MSVC to match
+// __clang_cuda_cmath.h logic, but they require implementations in
+// __clang_cuda_device_functions.h to avoid link errors.
+#if !defined(_MSC_VER)
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isfinite(long double);
#endif
-__DEVICE__ bool isfinite(double);
-__DEVICE__ bool isfinite(float);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isfinite(double);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isfinite(float);
__DEVICE__ bool isgreater(double, double);
__DEVICE__ bool isgreaterequal(double, double);
__DEVICE__ bool isgreaterequal(float, float);
__DEVICE__ bool isgreater(float, float);
-#ifdef _MSC_VER
-__DEVICE__ bool isinf(long double);
+#if !defined(_MSC_VER)
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isinf(long double);
#endif
-__DEVICE__ bool isinf(double);
-__DEVICE__ bool isinf(float);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isinf(double);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isinf(float);
__DEVICE__ bool isless(double, double);
__DEVICE__ bool islessequal(double, double);
__DEVICE__ bool islessequal(float, float);
__DEVICE__ bool isless(float, float);
__DEVICE__ bool islessgreater(double, double);
__DEVICE__ bool islessgreater(float, float);
-#ifdef _MSC_VER
-__DEVICE__ bool isnan(long double);
+#if !defined(_MSC_VER)
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isnan(long double);
#endif
-__DEVICE__ bool isnan(double);
-__DEVICE__ bool isnan(float);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isnan(double);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isnan(float);
__DEVICE__ bool isnormal(double);
__DEVICE__ bool isnormal(float);
__DEVICE__ bool isunordered(double, double);
@@ -165,11 +183,11 @@ __DEVICE__ double scalbln(double, long);
__DEVICE__ float scalbln(float, long);
__DEVICE__ double scalbn(double, int);
__DEVICE__ float scalbn(float, int);
-#ifdef _MSC_VER
-__DEVICE__ bool signbit(long double);
+#if !defined(_MSC_VER)
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE signbit(long double);
#endif
-__DEVICE__ bool signbit(double);
-__DEVICE__ bool signbit(float);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE signbit(double);
+__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE signbit(float);
__DEVICE__ double sin(double);
__DEVICE__ float sin(float);
__DEVICE__ double sinh(double);
@@ -185,9 +203,6 @@ __DEVICE__ float tgamma(float);
__DEVICE__ double trunc(double);
__DEVICE__ float trunc(float);
-// Notably missing above is nexttoward, which we don't define on
-// the device side because libdevice doesn't give us an implementation, and we
-// don't want to be in the business of writing one ourselves.
// We need to define these overloads in exactly the namespace our standard
// library uses (including the right inline namespace), otherwise they won't be
>From a291032af6d170b921e028555c6f257ba47eece9 Mon Sep 17 00:00:00 2001
From: fenodem <fenodem at protonmail.com>
Date: Fri, 20 Mar 2026 09:48:57 +0000
Subject: [PATCH 2/4] Update __clang_cuda_device_functions.h
---
.../Headers/__clang_cuda_device_functions.h | 79 ++++++++++++++-----
1 file changed, 61 insertions(+), 18 deletions(-)
diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h
index 0226fe95abab6..4658c92d37206 100644
--- a/clang/lib/Headers/__clang_cuda_device_functions.h
+++ b/clang/lib/Headers/__clang_cuda_device_functions.h
@@ -223,11 +223,65 @@ __DEVICE__ float __fdividef(float __a, float __b) {
}
__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }
__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
-__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
-__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
-#ifdef _MSC_VER
-__DEVICE__ int __finitel(long double __a);
-#endif
+
+// ---------------------------------------------------------------------------
+// Classification Function Internal Names
+// ---------------------------------------------------------------------------
+// WARNING: Do NOT consolidate these functions. CUDA's math_functions.hpp calls
+// distinct names (e.g., __signbit vs __signbitd). Removing one causes
+// "no matching function" errors.
+//
+// Note: We use __inline__ without static. This provides external linkage
+// semantics which matches the expectations of CUDA headers declaring these
+// as 'extern' for the GCC/MinGW environment, while still allowing inlining.
+// ---------------------------------------------------------------------------
+
+// Float implementations
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __finitef(float __a) { return __builtin_isfinite(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __isinff(float __a) { return __builtin_isinf(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __isnanf(float __a) { return __builtin_isnan(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __signbitf(float __a) { return __builtin_signbit(__a); }
+
+// Double implementations
+// Note: Both __finite and __isfinited are defined because CUDA headers
+// reference distinct names in different contexts (similar to __signbit/__signbitd).
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __finite(double __a) { return __builtin_isfinite(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __isfinited(double __a) { return __builtin_isfinite(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __isinf(double __a) { return __builtin_isinf(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __isnan(double __a) { return __builtin_isnan(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __signbit(double __a) { return __builtin_signbit(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __signbitd(double __a) { return __builtin_signbit(__a); }
+
+// Long double implementations (UNGUARDED - intentional)
+// IMPORTANT: Do NOT cast to double. Clang's builtins natively support long double.
+// Casting causes incorrect results on MinGW/Linux where long double has higher
+// precision than double (e.g. finite values that overflow double).
+// NOTE: Clang does NOT support __builtin_isfinitel. Using suffixed builtins
+// will fail. The generic builtin preserves precision for 80-bit long double
+// on MinGW hosts and handles double demotion on devices automatically.
+// NOTE: Do NOT add #if !defined(_MSC_VER) here. Unlike wrappers, these
+// are __inline__ with distinct names (__finitel vs __finite).
+// They have no linker visibility and are optimized away if unused.
+// CUDA headers may call these on any platform - define unconditionally.
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __finitel(long double __a) { return __builtin_isfinite(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __isinfl(long double __a) { return __builtin_isinf(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __isnanl(long double __a) { return __builtin_isnan(__a); }
+__inline__ __host__ __device__ __attribute__((always_inline))
+int __signbitl(long double __a) { return __builtin_signbit(__a); }
+
__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
@@ -433,17 +487,7 @@ __DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }
__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }
__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }
__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }
-__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
-__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
-__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
-#ifdef _MSC_VER
-__DEVICE__ int __isinfl(long double __a);
-#endif
-__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
-__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
-#ifdef _MSC_VER
-__DEVICE__ int __isnanl(long double __a);
-#endif
+
__DEVICE__ double __ll2double_rd(long long __a) {
return __nv_ll2double_rd(__a);
}
@@ -515,8 +559,7 @@ __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
return __nv_sad(__a, __b, __c);
}
__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
-__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
-__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
+
__DEVICE__ void __sincosf(float __a, float *__s, float *__c) {
return __nv_fast_sincosf(__a, __s, __c);
}
>From eaa6e8296429c55b7d59f425a781a4b27a906b11 Mon Sep 17 00:00:00 2001
From: fenodem <fenodem at protonmail.com>
Date: Fri, 20 Mar 2026 10:01:39 +0000
Subject: [PATCH 3/4] Update __clang_cuda_cmath.h
---
clang/lib/Headers/__clang_cuda_cmath.h | 81 +++++++++++++-------------
1 file changed, 41 insertions(+), 40 deletions(-)
diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h
index 5bbb59a93b9e5..b78cad5f94544 100644
--- a/clang/lib/Headers/__clang_cuda_cmath.h
+++ b/clang/lib/Headers/__clang_cuda_cmath.h
@@ -65,54 +65,57 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
return ::frexpf(__arg, __exp);
}
-// For inscrutable reasons, the CUDA headers define these functions for us on
-// Windows.
-#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
-
-// For OpenMP we work around some old system headers that have non-conforming
-// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
-// this by providing two versions of these functions, differing only in the
-// return type. To avoid conflicting definitions we disable implicit base
-// function generation. That means we will end up with two specializations, one
-// per type, but only one has a base function defined by the system header.
+// ---------------------------------------------------------------------------
+// Standard Classification Functions
+// ---------------------------------------------------------------------------
+// OpenMP variants return 'int' (legacy compatibility).
+// Base functions return __CUDA_CLASSIFIER_RET_TYPE (bool/int per ABI).
+// ---------------------------------------------------------------------------
+
#if defined(__OPENMP_NVPTX__)
#pragma omp begin declare variant match( \
implementation = {extension(disable_implicit_base)})
-// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
-// add a suffix. This means we would clash with the names of the variants
-// (note that we do not create implicit base functions here). To avoid
-// this clash we add a new trait to some of them that is always true
-// (this is LLVM after all ;)). It will only influence the mangled name
-// of the variants inside the inner region and avoid the clash.
#pragma omp begin declare variant match(implementation = {vendor(llvm)})
-__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
-__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+// OpenMP path: Return 'int' for legacy compatibility.
+static __host__ __device__ int isinf(float __x) { return ::__isinff(__x); }
+static __host__ __device__ int isinf(double __x) { return ::__isinf(__x); }
+static __host__ __device__ int isfinite(float __x) { return ::__finitef(__x); }
+static __host__ __device__ int isfinite(double __x) { return ::__isfinited(__x); }
+static __host__ __device__ int isnan(float __x) { return ::__isnanf(__x); }
+static __host__ __device__ int isnan(double __x) { return ::__isnan(__x); }
+static __host__ __device__ int signbit(float __x) { return ::__signbitf(__x); }
+static __host__ __device__ int signbit(double __x) { return ::__signbitd(__x); }
#pragma omp end declare variant
-#endif
-
-__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
-// For inscrutable reasons, __finite(), the double-precision version of
-// __finitef, does not exist when compiling for MacOS. __isfinited is available
-// everywhere and is just as good.
-__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
-__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
-
-#if defined(__OPENMP_NVPTX__)
#pragma omp end declare variant
-#endif
+#else // !__OPENMP_NVPTX__
+
+// Base path (CUDA): Return type matches __CUDA_CLASSIFIER_RET_TYPE.
+// 'int' for MinGW, 'bool' for MSVC.
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isinf(float __x) { return ::__isinff(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isinf(double __x) { return ::__isinf(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isfinite(float __x) { return ::__finitef(__x); }
+// MacOS: __finite is unavailable; __isfinited works everywhere.
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isfinite(double __x) { return ::__isfinited(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isnan(float __x) { return ::__isnanf(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isnan(double __x) { return ::__isnan(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE signbit(float __x) { return ::__signbitf(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE signbit(double __x) { return ::__signbitd(__x); }
+
+// Long double support (MinGW/Linux only).
+// Long double wrappers (MSVC-guarded - intentional)
+// On MSVC, long double == double, causing overload conflicts.
+#if !defined(_MSC_VER)
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isfinite(long double __x) { return ::__finitel(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isinf(long double __x) { return ::__isinfl(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isnan(long double __x) { return ::__isnanl(__x); }
+static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE signbit(long double __x) { return ::__signbitl(__x); }
+#endif // !_MSC_VER
-#endif
+#endif // __OPENMP_NVPTX__
__DEVICE__ bool isgreater(float __x, float __y) {
return __builtin_isgreater(__x, __y);
@@ -167,8 +170,6 @@ __DEVICE__ float pow(float __base, int __iexp) {
__DEVICE__ double pow(double __base, int __iexp) {
return ::powi(__base, __iexp);
}
-__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
-__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
__DEVICE__ float sin(float __x) { return ::sinf(__x); }
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
@@ -289,7 +290,7 @@ __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
-__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
+__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round)
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
>From ff13ec335dace51851cb803b13180505deb52c4e Mon Sep 17 00:00:00 2001
From: fenodem <fenodem at protonmail.com>
Date: Fri, 20 Mar 2026 12:26:24 +0000
Subject: [PATCH 4/4] Update __clang_cuda_runtime_wrapper.h
---
.../Headers/__clang_cuda_runtime_wrapper.h | 65 ++++++++++++++++++-
1 file changed, 63 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 295f4191f9927..ee1313092d4c6 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -39,12 +39,45 @@
// Include some standard headers to avoid CUDA headers including them
// while some required macros (like __THROW) are in a weird state.
#include <climits>
+
+// ---------------------------------------------------------------------------
+// MinGW (GCC) Compatibility Fix
+// ---------------------------------------------------------------------------
+// MinGW's math.h declares internal names like __isnanf, __signbitf as
+// __host__ functions. This conflicts with our __host__ __device__ definitions.
+// We rename them out of the way before including <cmath>.
+// ---------------------------------------------------------------------------
+#define __isnanf __mingw_hidden_isnanf
+#define __isinf __mingw_hidden_isinf
+#define __isinff __mingw_hidden_isinff
+#define __finite __mingw_hidden_finite
+#define __finitef __mingw_hidden_finitef
+#define __signbit __mingw_hidden_signbit
+#define __signbitf __mingw_hidden_signbitf
+#define __isnanl __mingw_hidden_isnanl
+#define __isinfl __mingw_hidden_isinfl
+#define __finitel __mingw_hidden_finitel
+#define __signbitl __mingw_hidden_signbitl
+
#include <cmath>
#include <cstdlib>
#include <stdlib.h>
#include <string.h>
#undef __CUDACC__
+// Restore the names so we can use them for our own definitions.
+#undef __isnanf
+#undef __isinf
+#undef __isinff
+#undef __finite
+#undef __finitef
+#undef __signbit
+#undef __signbitf
+#undef __isnanl
+#undef __isinfl
+#undef __finitel
+#undef __signbitl
+
// math_functions.h from CUDA 13.2+ defines _NV_RSQRT_SPECIFIER.
// Clang does not include it, so we need to define it ourselves.
#if defined(__GNUC__) && defined(__GLIBC_PREREQ)
@@ -218,12 +251,31 @@ inline __host__ double __signbitd(double x) {
#define __USE_FAST_MATH__ 1
#endif
+// ---------------------------------------------------------------------------
+// Macro Poisoning - Universal (NOT Platform-Specific)
+// ---------------------------------------------------------------------------
+// Poison the standard names to prevent ODR violations or incorrect overloads
+// from CUDA headers.
+// ---------------------------------------------------------------------------
+#define isfinite __cuda_disabled_isfinite
+#define isinf __cuda_disabled_isinf
+#define isnan __cuda_disabled_isnan
+#define signbit __cuda_disabled_signbit
+
#if CUDA_VERSION >= 9000
#include "crt/math_functions.hpp"
#else
#include "math_functions.hpp"
#endif
+// ---------------------------------------------------------------------------
+// Macro Restoration
+// ---------------------------------------------------------------------------
+#undef isfinite
+#undef isinf
+#undef isnan
+#undef signbit
+
#pragma pop_macro("__USE_FAST_MATH__")
#if CUDA_VERSION < 9000
@@ -342,7 +394,11 @@ __DEVICE__ unsigned int __isLocal(const void *p) {
// conditional on __GNUC__. :)
#pragma push_macro("signbit")
#pragma push_macro("__GNUC__")
-#undef __GNUC__
+#ifndef __GNUC__
+#define __GNUC__ 4
+#define __CLANG_CUDA_DEFINED_GNUC
+#endif
+
#define signbit __ignored_cuda_signbit
// CUDA-9 omits device-side definitions of some math functions if it sees
@@ -365,6 +421,12 @@ __DEVICE__ unsigned int __isLocal(const void *p) {
#endif
#pragma pop_macro("_GLIBCXX_MATH_H")
#pragma pop_macro("_LIBCPP_VERSION")
+
+// Restore original __GNUC__ state
+#ifdef __CLANG_CUDA_DEFINED_GNUC
+#undef __GNUC__
+#undef __CLANG_CUDA_DEFINED_GNUC
+#endif
#pragma pop_macro("__GNUC__")
#pragma pop_macro("signbit")
@@ -505,7 +567,6 @@ __device__ inline __cuda_builtin_gridDim_t::operator uint3() const {
#include "curand_mtgp32_kernel.h"
#pragma pop_macro("dim3")
#pragma pop_macro("uint3")
-#pragma pop_macro("__USE_FAST_MATH__")
#pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
// CUDA runtime uses this undocumented function to access kernel launch
More information about the cfe-commits
mailing list