[libclc] libclc: Unify fast FMA controls (PR #188244)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 24 06:29:09 PDT 2026
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/188244
This was defined in multiple places with different names. Consolidate
on on, with a gentype wrapper for it. Also set the value based on the
standard FP_FAST_FMA* macros.
>From 79970465a30bd0262b6e2fa9e36069d51d30f2ad Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 24 Mar 2026 13:57:02 +0100
Subject: [PATCH] libclc: Unify fast FMA controls
This was defined in multiple places with different names. Consolidate
on on, with a gentype wrapper for it. Also set the value based on the
standard FP_FAST_FMA* macros.
---
libclc/clc/include/clc/math/gentype.inc | 6 ++++++
libclc/clc/include/clc/math/math.h | 19 +++++++++++++++++--
.../lib/generic/math/clc_sincos_helpers.inc | 4 ++--
3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/libclc/clc/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc
index ff5b77e029399..ad676e06e0385 100644
--- a/libclc/clc/include/clc/math/gentype.inc
+++ b/libclc/clc/include/clc/math/gentype.inc
@@ -84,6 +84,7 @@
#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
#define __CLC_GENTYPE_DENORMS_ARE_ZERO __clc_denormals_are_zero_fp32()
+#define __CLC_GENTYPE_FAST_FMA __CLC_FAST_FMA_F32
#define __CLC_BIT_INT int
#define __CLC_SCALAR
@@ -124,6 +125,7 @@
#undef __CLC_GENTYPE_MIN
#undef __CLC_GENTYPE_TRUE_MIN
#undef __CLC_GENTYPE_DENORMS_ARE_ZERO
+#undef __CLC_GENTYPE_FAST_FMA
#undef __CLC_FP_LIT
#undef __CLC_FPSIZE
#undef __CLC_SCALAR_GENTYPE
@@ -141,6 +143,7 @@
#define __CLC_GENTYPE_MIN (__CLC_GENTYPE) DBL_MIN
#define __CLC_GENTYPE_TRUE_MIN (__CLC_GENTYPE) DBL_TRUE_MIN
#define __CLC_GENTYPE_DENORMS_ARE_ZERO __clc_denormals_are_zero_fp64()
+#define __CLC_GENTYPE_FAST_FMA __CLC_FAST_FMA_F64
#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
@@ -184,6 +187,7 @@
#undef __CLC_GENTYPE_MIN
#undef __CLC_GENTYPE_TRUE_MIN
#undef __CLC_GENTYPE_DENORMS_ARE_ZERO
+#undef __CLC_GENTYPE_FAST_FMA
#undef __CLC_FP_LIT
#undef __CLC_FPSIZE
#undef __CLC_SCALAR_GENTYPE
@@ -201,6 +205,7 @@
#define __CLC_GENTYPE_MIN (__CLC_GENTYPE) HALF_MIN
#define __CLC_GENTYPE_TRUE_MIN (__CLC_GENTYPE) HALF_TRUE_MIN
#define __CLC_GENTYPE_DENORMS_ARE_ZERO __clc_denormals_are_zero_fp16()
+#define __CLC_GENTYPE_FAST_FMA __CLC_FAST_FMA_F16
#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
@@ -244,6 +249,7 @@
#undef __CLC_GENTYPE_MIN
#undef __CLC_GENTYPE_TRUE_MIN
#undef __CLC_GENTYPE_DENORMS_ARE_ZERO
+#undef __CLC_GENTYPE_FAST_FMA
#undef __CLC_FP_LIT
#undef __CLC_FPSIZE
#undef __CLC_SCALAR_GENTYPE
diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h
index a676c357f4633..22ed3f9defcbe 100644
--- a/libclc/clc/include/clc/math/math.h
+++ b/libclc/clc/include/clc/math/math.h
@@ -24,10 +24,25 @@
#define PNOR 0x100
#define PINF 0x200
-#define __CLC_HAVE_HW_FMA32() (1)
+#ifdef FP_FAST_FMA_HALF
+#define __CLC_FAST_FMA_F16 1
+#else
+#define __CLC_FAST_FMA_F16 0
+#endif
+
+#ifdef FP_FAST_FMAF
+#define __CLC_FAST_FMA_F32 1
+#else
+#define __CLC_FAST_FMA_F32 0
+#endif
+
+#ifdef FP_FAST_FMA
+#define __CLC_FAST_FMA_F64 1
+#else
+#define __CLC_FAST_FMA_F64 0
+#endif
#define HAVE_BITALIGN() (0)
-#define HAVE_FAST_FMA32() (0)
#define MATH_DIVIDE(X, Y) ((X) / (Y))
#define MATH_RECIP(X) (1.0f / (X))
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index 2603f9f46de1a..de82df684e1e1 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -128,7 +128,7 @@ _CLC_DEF _CLC_OVERLOAD void __clc_fullMulS(private __CLC_FLOATN *hi,
private __CLC_FLOATN *lo,
__CLC_FLOATN a, __CLC_FLOATN b,
__CLC_FLOATN bh, __CLC_FLOATN bt) {
- if (__CLC_HAVE_HW_FMA32()) {
+ if (__CLC_FAST_FMA_F32) {
__CLC_FLOATN ph = a * b;
*hi = ph;
*lo = __clc_fma(a, b, -ph);
@@ -305,7 +305,7 @@ __clc_argReductionLargeS(private __CLC_FLOATN *r, __CLC_FLOATN x) {
__CLC_FLOATN rh, rt;
- if (__CLC_HAVE_HW_FMA32()) {
+ if (__CLC_FAST_FMA_F32) {
rh = q1 * pio2h;
rt = __clc_fma(q0, pio2h, __clc_fma(q1, pio2t, __clc_fma(q1, pio2h, -rh)));
} else {
More information about the cfe-commits
mailing list