[clang] 01d3932 - [Clang][HIP] Include `__clang_cuda_math_forward_declares.h` before `<cmath>` (#201563)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 9 03:20:10 PDT 2026
Author: Juan Manuel Martinez CaamaƱo
Date: 2026-06-09T12:20:05+02:00
New Revision: 01d3932364bee33f8e861d5664c2983cc855124f
URL: https://github.com/llvm/llvm-project/commit/01d3932364bee33f8e861d5664c2983cc855124f
DIFF: https://github.com/llvm/llvm-project/commit/01d3932364bee33f8e861d5664c2983cc855124f.diff
LOG: [Clang][HIP] Include `__clang_cuda_math_forward_declares.h` before `<cmath>` (#201563)
In HIP, `constexpr` functions are treated as both `__host__` and
`__device__`.
A new version of the MS STL shipped with the build tools version
14.51.36231 has `constexpr` definitions for some `cmath` functions when
the
compiler in use is Clang (this gets worse when C++23 is in use).
These definitions conflict with the `__device__` declarations we provide
in the header wrappers.
There is a workaround for this: We do not mark `constexpr`
functions [_that are defined in a system
header_](https://github.com/llvm/llvm-project/blob/03127a03860b9d8cb440fe8f51c00647f45eb8be/clang/lib/Sema/SemaCUDA.cpp#L877)
as
`__host__` and `__device__` if there is a previous `__device__`
declaration.
By moving `__clang_cuda_math_forward_declares.h` before `<cmath>` is
included we're able to benefit from this behavior.
This fixes error like this one
https://github.com/ggml-org/llama.cpp/issues/22570
even for recent versions of C++.
This patch should address C++23 concerns where more functions are marked
as `constexpr`.
However, we should still figure out how to provide `__device__
constexpr` versions for those functions, or not provide ours and use the
system's if they exist.
This patch replaces https://github.com/llvm/llvm-project/pull/200395
Added:
clang/test/Headers/hip-constexpr-cmath.hip
Modified:
clang/lib/Headers/__clang_hip_runtime_wrapper.h
Removed:
################################################################################
diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
index 19ce7a5d2c86b..4b8cffd86f044 100644
--- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -111,6 +111,12 @@ __attribute__((weak)) inline __device__ void free(void *__ptr) {
#endif //__cplusplus
#if !defined(__HIPCC_RTC__)
+// We must include the forward declarations before cmath is included.
+// Otherwise `constexpr` functions in <cmath> would be implicitely __host__
+// __device__. Declaring the __device__ verison before allows us to overload
+// them with __device__ versions (this behavour is only valid for system
+// headers).
+#include <__clang_cuda_math_forward_declares.h>
#include <cmath>
#include <cstdlib>
#include <stdlib.h>
@@ -144,7 +150,6 @@ typedef __SIZE_TYPE__ size_t;
#if defined(__HIPCC_RTC__)
#include <__clang_hip_cmath.h>
#else
-#include <__clang_cuda_math_forward_declares.h>
#include <__clang_hip_cmath.h>
#include <__clang_cuda_complex_builtins.h>
#include <algorithm>
diff --git a/clang/test/Headers/hip-constexpr-cmath.hip b/clang/test/Headers/hip-constexpr-cmath.hip
new file mode 100644
index 0000000000000..8b2c3187bf82c
--- /dev/null
+++ b/clang/test/Headers/hip-constexpr-cmath.hip
@@ -0,0 +1,70 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// Test with the pre-202604 stl
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN: -internal-isystem %t/stl/include \
+// RUN: -internal-isystem %t/stl/2025/include \
+// RUN: -internal-isystem %S/Inputs/include \
+// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN: -target-cpu gfx906 -verify %t/main.hip -fcuda-is-device -o /dev/null
+//
+// Test with the 202604 stl
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN: -internal-isystem %t/stl/include \
+// RUN: -internal-isystem %t/stl/2026/include \
+// RUN: -internal-isystem %S/Inputs/include \
+// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN: -target-cpu gfx906 -verify %t/main.hip -fcuda-is-device -o /dev/null
+
+#--- stl/include/stl_common
+_CLANG_BUILTIN2(isunordered)
+_CLANG_BUILTIN2(isgreater)
+_CLANG_BUILTIN2(isgreaterequal)
+_CLANG_BUILTIN2(isless)
+_CLANG_BUILTIN2(islessequal)
+_CLANG_BUILTIN2(islessgreater)
+
+#define FP_NAN 0
+#define FP_INFINITE 1
+#define FP_ZERO 2
+#define FP_SUBNORMAL 3
+#define FP_NORMAL 4
+#--- stl/2025/include/version
+#define _MSVC_STL_UPDATE 202508L
+#--- stl/2025/include/cmath
+#define _CLANG_BUILTIN2(NAME) \
+ bool NAME(float x, float y) noexcept { \
+ return __builtin_##NAME(x, y); \
+ }
+
+#include <stl_common>
+#--- stl/2026/include/version
+#define _MSVC_STL_UPDATE 202604L
+#--- stl/2026/include/cmath
+#define _CLANG_BUILTIN2(NAME) \
+ bool constexpr NAME(float x, float y) noexcept { \
+ return __builtin_##NAME(x, y); \
+ }
+
+#include <stl_common>
+#--- main.hip
+// expected-no-diagnostics
+
+#include <cmath>
+
+#define TEST_CMP(NAME) \
+ bool __attribute__((device)) test_device_ ## NAME (float x) { \
+ bool b = NAME(x, x); \
+ return b; \
+ }
+
+TEST_CMP(isunordered)
+TEST_CMP(isgreater)
+TEST_CMP(isgreaterequal)
+TEST_CMP(isless)
+TEST_CMP(islessequal)
+TEST_CMP(islessgreater)
More information about the cfe-commits
mailing list