[libclc] [libclc] Refine __clc_fp*_subnormals_supported and __clc_flush_denormal_if_not_supported (PR #157633)
Wenju He via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 3 00:04:23 PDT 2025
https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/157633
>From 7e2d210d9c6cd20c342562a44c2e4d2cb238e229 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Tue, 9 Sep 2025 11:05:02 +0200
Subject: [PATCH 1/8] [libclc] Refine __clc_fp*_subnormals_supported and
__clc_flush_denormal_if_not_supported
Remove the dependency on the libclc build-time configuration for
__clc_fp*_subnormals_supported. The check is now implemented with LLVM
intrinsics so it can be resolved during target lowering or at runtime.
Improve __clc_flush_denormal_if_not_supported implementation as well.
It doesn't use __clc_fp*_subnormals_supported which canonicalizes sNaN
and thus the new implementation is more foldable.
Remove cmake option ENABLE_RUNTIME_SUBNORMAL and related code.
Resolves #153148
Co-authored-by: Matt Arsenault <Matthew.Arsenault at amd.com>
---
libclc/CMakeLists.txt | 18 --------
.../include/clc/math/clc_subnormal_config.h | 1 -
libclc/clc/include/clc/math/math.h | 13 ++----
libclc/clc/lib/generic/SOURCES | 1 +
libclc/clc/lib/generic/math/clc_exp10.cl | 1 -
libclc/clc/lib/generic/math/clc_hypot.cl | 1 -
libclc/clc/lib/generic/math/clc_pow.cl | 1 -
libclc/clc/lib/generic/math/clc_pown.cl | 1 -
libclc/clc/lib/generic/math/clc_powr.cl | 1 -
libclc/clc/lib/generic/math/clc_remquo.cl | 1 -
.../lib/generic/math/clc_subnormal_config.cl | 46 +++++++++++++++++++
libclc/opencl/lib/clspv/SOURCES | 1 -
libclc/opencl/lib/clspv/subnormal_config.cl | 16 -------
libclc/opencl/lib/generic/SOURCES | 2 -
libclc/opencl/lib/generic/subnormal_config.cl | 18 --------
.../opencl/lib/generic/subnormal_disable.ll | 9 ----
.../lib/generic/subnormal_helper_func.ll | 16 -------
.../lib/generic/subnormal_use_default.ll | 9 ----
libclc/opencl/lib/spirv/SOURCES | 1 -
libclc/opencl/lib/spirv/subnormal_config.cl | 16 -------
20 files changed, 52 insertions(+), 121 deletions(-)
create mode 100644 libclc/clc/lib/generic/math/clc_subnormal_config.cl
delete mode 100644 libclc/opencl/lib/clspv/subnormal_config.cl
delete mode 100644 libclc/opencl/lib/generic/subnormal_config.cl
delete mode 100644 libclc/opencl/lib/generic/subnormal_disable.ll
delete mode 100644 libclc/opencl/lib/generic/subnormal_helper_func.ll
delete mode 100644 libclc/opencl/lib/generic/subnormal_use_default.ll
delete mode 100644 libclc/opencl/lib/spirv/subnormal_config.cl
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index c75f450d8d3ad..572556034e66c 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -41,8 +41,6 @@ set( LIBCLC_MIN_LLVM 3.9.0 )
set( LIBCLC_TARGETS_TO_BUILD "all"
CACHE STRING "Semicolon-separated list of libclc targets to build, or 'all'." )
-option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF )
-
option(
LIBCLC_USE_SPIRV_BACKEND "Build SPIR-V targets with the SPIR-V backend." OFF
)
@@ -231,19 +229,6 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
configure_file( libclc.pc.in libclc.pc @ONLY )
install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTALL_DATADIR}/pkgconfig" )
-if( ENABLE_RUNTIME_SUBNORMAL )
- foreach( file IN ITEMS subnormal_use_default subnormal_disable )
- link_bc(
- TARGET ${file}
- INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/${file}.ll
- )
- install(
- FILES $<TARGET_PROPERTY:${file},TARGET_FILE>
- DESTINATION "${CMAKE_INSTALL_DATADIR}/clc"
- )
- endforeach()
-endif()
-
find_package( Python3 REQUIRED COMPONENTS Interpreter )
file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/gen_convert.py script_loc )
add_custom_command(
@@ -371,9 +356,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
list( APPEND opencl_gen_files clspv-convert.cl )
else()
list( APPEND opencl_gen_files convert.cl )
- if ( NOT ENABLE_RUNTIME_SUBNORMAL )
- list( APPEND opencl_lib_files opencl/lib/generic/subnormal_use_default.ll )
- endif()
endif()
endif()
diff --git a/libclc/clc/include/clc/math/clc_subnormal_config.h b/libclc/clc/include/clc/math/clc_subnormal_config.h
index 14693ed01e033..e44ec1958b101 100644
--- a/libclc/clc/include/clc/math/clc_subnormal_config.h
+++ b/libclc/clc/include/clc/math/clc_subnormal_config.h
@@ -10,7 +10,6 @@
#include <clc/clcfunc.h>
-_CLC_DECL bool __clc_subnormals_disabled();
_CLC_DECL bool __clc_fp16_subnormals_supported();
_CLC_DECL bool __clc_fp32_subnormals_supported();
_CLC_DECL bool __clc_fp64_subnormals_supported();
diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h
index c2647f66b4006..2db5d187c88ce 100644
--- a/libclc/clc/include/clc/math/math.h
+++ b/libclc/clc/include/clc/math/math.h
@@ -11,7 +11,6 @@
#include <clc/clc_as_type.h>
#include <clc/clcfunc.h>
-#include <clc/math/clc_subnormal_config.h>
#define SNAN 0x001
#define QNAN 0x002
@@ -66,13 +65,11 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
- int ix = __clc_as_int(x);
- if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
- ((ix & MANTBITS_SP32) != 0)) {
- ix &= SIGNBIT_SP32;
- x = __clc_as_float(ix);
- }
- return x;
+ // Avoid calling __clc_fp32_subnormals_supported here: it uses
+ // llvm.canonicalize, which quiets sNaN.
+ return __builtin_fabsf(x) < 0x1p-149f
+ ? __builtin_elementwise_copysign(0.0f, x)
+ : x;
}
#ifdef cl_khr_fp64
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index ee4f771799e8e..4a6dadc702033 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -137,6 +137,7 @@ math/clc_sincos_helpers.cl
math/clc_sinh.cl
math/clc_sinpi.cl
math/clc_sqrt.cl
+math/clc_subnormal_config.cl
math/clc_sw_fma.cl
math/clc_tables.cl
math/clc_tan.cl
diff --git a/libclc/clc/lib/generic/math/clc_exp10.cl b/libclc/clc/lib/generic/math/clc_exp10.cl
index 0c394ee19475a..fb33367851fda 100644
--- a/libclc/clc/lib/generic/math/clc_exp10.cl
+++ b/libclc/clc/lib/generic/math/clc_exp10.cl
@@ -11,7 +11,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_isnan.h>
diff --git a/libclc/clc/lib/generic/math/clc_hypot.cl b/libclc/clc/lib/generic/math/clc_hypot.cl
index c934ab29da91b..fd046bccaed51 100644
--- a/libclc/clc/lib/generic/math/clc_hypot.cl
+++ b/libclc/clc/lib/generic/math/clc_hypot.cl
@@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_sqrt.h>
-#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/relational/clc_isnan.h>
#include <clc/shared/clc_clamp.h>
diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl
index 70d3d614a8d36..c20d3829ea076 100644
--- a/libclc/clc/lib/generic/math/clc_pow.cl
+++ b/libclc/clc/lib/generic/math/clc_pow.cl
@@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_select.h>
diff --git a/libclc/clc/lib/generic/math/clc_pown.cl b/libclc/clc/lib/generic/math/clc_pown.cl
index 5aa9560174b99..cfc415753fd1a 100644
--- a/libclc/clc/lib/generic/math/clc_pown.cl
+++ b/libclc/clc/lib/generic/math/clc_pown.cl
@@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_select.h>
diff --git a/libclc/clc/lib/generic/math/clc_powr.cl b/libclc/clc/lib/generic/math/clc_powr.cl
index 0556ec97d6f3c..c35a3e2c382c5 100644
--- a/libclc/clc/lib/generic/math/clc_powr.cl
+++ b/libclc/clc/lib/generic/math/clc_powr.cl
@@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_select.h>
diff --git a/libclc/clc/lib/generic/math/clc_remquo.cl b/libclc/clc/lib/generic/math/clc_remquo.cl
index fd83ead06d89a..cdebe4922baa0 100644
--- a/libclc/clc/lib/generic/math/clc_remquo.cl
+++ b/libclc/clc/lib/generic/math/clc_remquo.cl
@@ -12,7 +12,6 @@
#include <clc/math/clc_floor.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
-#include <clc/math/clc_subnormal_config.h>
#include <clc/math/clc_trunc.h>
#include <clc/math/math.h>
#include <clc/shared/clc_max.h>
diff --git a/libclc/clc/lib/generic/math/clc_subnormal_config.cl b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
new file mode 100644
index 0000000000000..6be6eb44ae4f4
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/internal/clc.h>
+#include <clc/math/clc_subnormal_config.h>
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_DEF bool __clc_fp16_subnormals_supported() {
+#ifdef CLC_SPIRV
+ // SPIR-V doesn't support llvm.canonicalize for now.
+ return false;
+#else
+ return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-24h),
+ __FPCLASS_POSZERO);
+#endif
+}
+#endif // cl_khr_fp16
+
+_CLC_DEF bool __clc_fp32_subnormals_supported() {
+#ifdef CLC_SPIRV
+ // SPIR-V doesn't support llvm.canonicalize for now.
+ return false;
+#else
+ return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-149f),
+ __FPCLASS_POSZERO);
+#endif
+}
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_DEF bool __clc_fp64_subnormals_supported() {
+#ifdef CLC_SPIRV
+ // SPIR-V doesn't support llvm.canonicalize for now.
+ return false;
+#else
+ return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-1074),
+ __FPCLASS_POSZERO);
+#endif
+}
+#endif // cl_khr_fp64
diff --git a/libclc/opencl/lib/clspv/SOURCES b/libclc/opencl/lib/clspv/SOURCES
index 0a142ed3e6043..3d9f871ff57ca 100644
--- a/libclc/opencl/lib/clspv/SOURCES
+++ b/libclc/opencl/lib/clspv/SOURCES
@@ -1,6 +1,5 @@
math/fma.cl
shared/vstore_half.cl
-subnormal_config.cl
../generic/geometric/distance.cl
../generic/geometric/length.cl
../generic/math/acos.cl
diff --git a/libclc/opencl/lib/clspv/subnormal_config.cl b/libclc/opencl/lib/clspv/subnormal_config.cl
deleted file mode 100644
index 114aabb2e9435..0000000000000
--- a/libclc/opencl/lib/clspv/subnormal_config.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/math/clc_subnormal_config.h>
-#include <clc/opencl/opencl-base.h>
-
-_CLC_DEF bool __clc_fp16_subnormals_supported() { return false; }
-
-_CLC_DEF bool __clc_fp32_subnormals_supported() { return false; }
-
-_CLC_DEF bool __clc_fp64_subnormals_supported() { return false; }
diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES
index 61757efbcaad7..410fbdee2c71f 100644
--- a/libclc/opencl/lib/generic/SOURCES
+++ b/libclc/opencl/lib/generic/SOURCES
@@ -1,5 +1,3 @@
-subnormal_config.cl
-subnormal_helper_func.ll
async/async_work_group_copy.cl
async/async_work_group_strided_copy.cl
async/prefetch.cl
diff --git a/libclc/opencl/lib/generic/subnormal_config.cl b/libclc/opencl/lib/generic/subnormal_config.cl
deleted file mode 100644
index aa2e30935e5f0..0000000000000
--- a/libclc/opencl/lib/generic/subnormal_config.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/math/clc_subnormal_config.h>
-#include <clc/opencl/opencl-base.h>
-
-_CLC_DEF bool __clc_fp16_subnormals_supported() { return false; }
-
-_CLC_DEF bool __clc_fp32_subnormals_supported() { return false; }
-
-_CLC_DEF bool __clc_fp64_subnormals_supported() {
- return !__clc_subnormals_disabled();
-}
diff --git a/libclc/opencl/lib/generic/subnormal_disable.ll b/libclc/opencl/lib/generic/subnormal_disable.ll
deleted file mode 100644
index 732d09ff09ab4..0000000000000
--- a/libclc/opencl/lib/generic/subnormal_disable.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;;===----------------------------------------------------------------------===;;
-;
-; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-; See https://llvm.org/LICENSE.txt for license information.
-; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-;
-;;===----------------------------------------------------------------------===;;
-
- at __CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 true
diff --git a/libclc/opencl/lib/generic/subnormal_helper_func.ll b/libclc/opencl/lib/generic/subnormal_helper_func.ll
deleted file mode 100644
index 03beecf979260..0000000000000
--- a/libclc/opencl/lib/generic/subnormal_helper_func.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-;;===----------------------------------------------------------------------===;;
-;
-; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-; See https://llvm.org/LICENSE.txt for license information.
-; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-;
-;;===----------------------------------------------------------------------===;;
-
- at __CLC_SUBNORMAL_DISABLE = external global i1
-
-define i1 @__clc_subnormals_disabled() #0 {
- %disable = load i1, i1* @__CLC_SUBNORMAL_DISABLE
- ret i1 %disable
-}
-
-attributes #0 = { alwaysinline }
diff --git a/libclc/opencl/lib/generic/subnormal_use_default.ll b/libclc/opencl/lib/generic/subnormal_use_default.ll
deleted file mode 100644
index c648cc0a8aded..0000000000000
--- a/libclc/opencl/lib/generic/subnormal_use_default.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;;===----------------------------------------------------------------------===;;
-;
-; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-; See https://llvm.org/LICENSE.txt for license information.
-; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-;
-;;===----------------------------------------------------------------------===;;
-
- at __CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 false
diff --git a/libclc/opencl/lib/spirv/SOURCES b/libclc/opencl/lib/spirv/SOURCES
index 0aa923978e9f1..aa7fcee0c4f4a 100644
--- a/libclc/opencl/lib/spirv/SOURCES
+++ b/libclc/opencl/lib/spirv/SOURCES
@@ -1,4 +1,3 @@
-subnormal_config.cl
../generic/async/async_work_group_strided_copy.cl
../generic/async/wait_group_events.cl
../generic/common/degrees.cl
diff --git a/libclc/opencl/lib/spirv/subnormal_config.cl b/libclc/opencl/lib/spirv/subnormal_config.cl
deleted file mode 100644
index 114aabb2e9435..0000000000000
--- a/libclc/opencl/lib/spirv/subnormal_config.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/math/clc_subnormal_config.h>
-#include <clc/opencl/opencl-base.h>
-
-_CLC_DEF bool __clc_fp16_subnormals_supported() { return false; }
-
-_CLC_DEF bool __clc_fp32_subnormals_supported() { return false; }
-
-_CLC_DEF bool __clc_fp64_subnormals_supported() { return false; }
>From 96ec9dc17b214839a8f0fcc58b7dae14f369b839 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Tue, 9 Sep 2025 17:18:40 +0800
Subject: [PATCH 2/8] Apply suggestions from code review
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
libclc/clc/lib/generic/math/clc_subnormal_config.cl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libclc/clc/lib/generic/math/clc_subnormal_config.cl b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
index 6be6eb44ae4f4..dd019789a99cc 100644
--- a/libclc/clc/lib/generic/math/clc_subnormal_config.cl
+++ b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
@@ -16,7 +16,7 @@ _CLC_DEF bool __clc_fp16_subnormals_supported() {
// SPIR-V doesn't support llvm.canonicalize for now.
return false;
#else
- return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-24h),
+ return !__builtin_isfpclass(__builtin_canonicalizef((float)0x1p-24h),
__FPCLASS_POSZERO);
#endif
}
@@ -39,7 +39,7 @@ _CLC_DEF bool __clc_fp64_subnormals_supported() {
// SPIR-V doesn't support llvm.canonicalize for now.
return false;
#else
- return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-1074),
+ return !__builtin_isfpclass(__builtin_canonicalize(0x1p-1074),
__FPCLASS_POSZERO);
#endif
}
>From d52fcdb027f8b45019bc7904aca78d0ff90bf48d Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Tue, 9 Sep 2025 12:25:27 +0200
Subject: [PATCH 3/8] use __builtin_elementwise_canonicalize
---
libclc/clc/lib/generic/math/clc_subnormal_config.cl | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libclc/clc/lib/generic/math/clc_subnormal_config.cl b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
index dd019789a99cc..55e28a35f6ce8 100644
--- a/libclc/clc/lib/generic/math/clc_subnormal_config.cl
+++ b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
@@ -16,7 +16,7 @@ _CLC_DEF bool __clc_fp16_subnormals_supported() {
// SPIR-V doesn't support llvm.canonicalize for now.
return false;
#else
- return !__builtin_isfpclass(__builtin_canonicalizef((float)0x1p-24h),
+ return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-24h),
__FPCLASS_POSZERO);
#endif
}
@@ -27,7 +27,7 @@ _CLC_DEF bool __clc_fp32_subnormals_supported() {
// SPIR-V doesn't support llvm.canonicalize for now.
return false;
#else
- return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-149f),
+ return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-149f),
__FPCLASS_POSZERO);
#endif
}
@@ -39,7 +39,7 @@ _CLC_DEF bool __clc_fp64_subnormals_supported() {
// SPIR-V doesn't support llvm.canonicalize for now.
return false;
#else
- return !__builtin_isfpclass(__builtin_canonicalize(0x1p-1074),
+ return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-1074),
__FPCLASS_POSZERO);
#endif
}
>From 4608f77d4bc81ec584e5c3c2d19b446ffc919a2f Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Tue, 9 Sep 2025 12:40:49 +0200
Subject: [PATCH 4/8] set -fdenormal-fp-math-f32=dynamic build flag globally
---
libclc/CMakeLists.txt | 1 +
1 file changed, 1 insertion(+)
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 572556034e66c..2447d97d2b624 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -412,6 +412,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Error on undefined macros
-Werror=undef
-fdiscard-value-names
+ -Xclang -fdenormal-fp-math-f32=dynamic
)
if( NOT "${cpu}" STREQUAL "" )
>From 3f665ce5b145ca5ab38a95e6cb145064e8c9dbfe Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Thu, 11 Sep 2025 01:02:12 +0200
Subject: [PATCH 5/8] rename __clc_flush_denormal_if_not_supportedto
__clc_soft_flush_denormal
---
libclc/clc/include/clc/math/math.h | 4 ++--
libclc/clc/lib/clspv/math/clc_sw_fma.cl | 6 +++---
libclc/clc/lib/generic/math/clc_remquo.inc | 4 ++--
libclc/clc/lib/generic/math/clc_sw_fma.cl | 6 +++---
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h
index 2db5d187c88ce..b43f6a0f4c993 100644
--- a/libclc/clc/include/clc/math/math.h
+++ b/libclc/clc/include/clc/math/math.h
@@ -64,10 +64,10 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
-_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
+_CLC_OVERLOAD _CLC_INLINE float __clc_soft_flush_denormal(float x) {
// Avoid calling __clc_fp32_subnormals_supported here: it uses
// llvm.canonicalize, which quiets sNaN.
- return __builtin_fabsf(x) < 0x1p-149f
+ return __builtin_elementwise_abs(x) < 0x1p-149f
? __builtin_elementwise_copysign(0.0f, x)
: x;
}
diff --git a/libclc/clc/lib/clspv/math/clc_sw_fma.cl b/libclc/clc/lib/clspv/math/clc_sw_fma.cl
index c28b9441b05ff..e67456b7f7ebc 100644
--- a/libclc/clc/lib/clspv/math/clc_sw_fma.cl
+++ b/libclc/clc/lib/clspv/math/clc_sw_fma.cl
@@ -127,9 +127,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
return c;
}
- a = __clc_flush_denormal_if_not_supported(a);
- b = __clc_flush_denormal_if_not_supported(b);
- c = __clc_flush_denormal_if_not_supported(c);
+ a = __clc_soft_flush_denormal(a);
+ b = __clc_soft_flush_denormal(b);
+ c = __clc_soft_flush_denormal(c);
if (a == 0.0f || b == 0.0f) {
return c;
diff --git a/libclc/clc/lib/generic/math/clc_remquo.inc b/libclc/clc/lib/generic/math/clc_remquo.inc
index 3a76ffed7f039..681020f501d65 100644
--- a/libclc/clc/lib/generic/math/clc_remquo.inc
+++ b/libclc/clc/lib/generic/math/clc_remquo.inc
@@ -8,8 +8,8 @@
_CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
__CLC_ADDRESS_SPACE int *quo) {
- x = __clc_flush_denormal_if_not_supported(x);
- y = __clc_flush_denormal_if_not_supported(y);
+ x = __clc_soft_flush_denormal(x);
+ y = __clc_soft_flush_denormal(y);
int ux = __clc_as_int(x);
int ax = ux & EXSIGNBIT_SP32;
float xa = __clc_as_float(ax);
diff --git a/libclc/clc/lib/generic/math/clc_sw_fma.cl b/libclc/clc/lib/generic/math/clc_sw_fma.cl
index 606e4df320a89..e8bf673b50f36 100644
--- a/libclc/clc/lib/generic/math/clc_sw_fma.cl
+++ b/libclc/clc/lib/generic/math/clc_sw_fma.cl
@@ -36,9 +36,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
return c;
}
- a = __clc_flush_denormal_if_not_supported(a);
- b = __clc_flush_denormal_if_not_supported(b);
- c = __clc_flush_denormal_if_not_supported(c);
+ a = __clc_soft_flush_denormal(a);
+ b = __clc_soft_flush_denormal(b);
+ c = __clc_soft_flush_denormal(c);
if (c == 0) {
return a * b;
>From 7b290a24e3662f203160e8df33421eb3928dd475 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 3 Oct 2025 07:46:43 +0200
Subject: [PATCH 6/8] delete clc_sw_fma
---
.../include/clc/internal/math/clc_sw_fma.h | 19 --
libclc/clc/include/clc/math/math.h | 9 -
libclc/clc/lib/clspv/SOURCES | 1 -
libclc/clc/lib/clspv/math/clc_sw_fma.cl | 274 ------------------
libclc/clc/lib/generic/SOURCES | 1 -
libclc/clc/lib/generic/math/clc_fma.inc | 4 -
.../lib/generic/math/clc_sincos_helpers.inc | 32 +-
libclc/clc/lib/generic/math/clc_sw_fma.cl | 165 -----------
libclc/clc/lib/spirv/SOURCES | 1 -
.../spirv/math/clc_runtime_has_hw_fma32.cl | 9 -
libclc/opencl/lib/clspv/math/fma.cl | 2 +-
libclc/opencl/lib/spirv/math/fma.cl | 2 +-
12 files changed, 7 insertions(+), 512 deletions(-)
delete mode 100644 libclc/clc/include/clc/internal/math/clc_sw_fma.h
delete mode 100644 libclc/clc/lib/clspv/math/clc_sw_fma.cl
delete mode 100644 libclc/clc/lib/generic/math/clc_sw_fma.cl
delete mode 100644 libclc/clc/lib/spirv/math/clc_runtime_has_hw_fma32.cl
diff --git a/libclc/clc/include/clc/internal/math/clc_sw_fma.h b/libclc/clc/include/clc/internal/math/clc_sw_fma.h
deleted file mode 100644
index 5d6c76879ceb9..0000000000000
--- a/libclc/clc/include/clc/internal/math/clc_sw_fma.h
+++ /dev/null
@@ -1,19 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef __CLC_INTERNAL_MATH_CLC_SW_FMA_H__
-#define __CLC_INTERNAL_MATH_CLC_SW_FMA_H__
-
-#define __CLC_FUNCTION __clc_sw_fma
-#define __CLC_BODY <clc/shared/ternary_decl.inc>
-
-#include <clc/math/gentype.inc>
-
-#undef __CLC_FUNCTION
-
-#endif // __CLC_INTERNAL_MATH_CLC_SW_FMA_H__
diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h
index b43f6a0f4c993..cc4eb4ce3ec76 100644
--- a/libclc/clc/include/clc/math/math.h
+++ b/libclc/clc/include/clc/math/math.h
@@ -23,15 +23,6 @@
#define PNOR 0x100
#define PINF 0x200
-#if (defined __AMDGCN__ || defined __R600__) && !defined __HAS_FMAF__
-#define __CLC_HAVE_HW_FMA32() (0)
-#elif defined(CLC_SPIRV)
-bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
-#define __CLC_HAVE_HW_FMA32() __clc_runtime_has_hw_fma32()
-#else
-#define __CLC_HAVE_HW_FMA32() (1)
-#endif
-
#define HAVE_BITALIGN() (0)
#define HAVE_FAST_FMA32() (0)
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
index b91b0e70a397d..2faea79cbc0bf 100644
--- a/libclc/clc/lib/clspv/SOURCES
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -1,2 +1 @@
-math/clc_sw_fma.cl
integer/clc_mul_hi.cl
diff --git a/libclc/clc/lib/clspv/math/clc_sw_fma.cl b/libclc/clc/lib/clspv/math/clc_sw_fma.cl
deleted file mode 100644
index e67456b7f7ebc..0000000000000
--- a/libclc/clc/lib/clspv/math/clc_sw_fma.cl
+++ /dev/null
@@ -1,274 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// This version is derived from the generic fma software implementation
-// (__clc_sw_fma), but avoids the use of ulong in favor of uint2. The logic has
-// been updated as appropriate.
-
-#include <clc/clc_as_type.h>
-#include <clc/float/definitions.h>
-#include <clc/integer/clc_abs.h>
-#include <clc/integer/clc_clz.h>
-#include <clc/integer/clc_hadd.h>
-#include <clc/integer/clc_mul_hi.h>
-#include <clc/integer/definitions.h>
-#include <clc/math/clc_mad.h>
-#include <clc/math/math.h>
-#include <clc/relational/clc_isinf.h>
-#include <clc/relational/clc_isnan.h>
-#include <clc/shared/clc_max.h>
-
-struct fp {
- uint2 mantissa;
- int exponent;
- uint sign;
-};
-
-static uint2 u2_set(uint hi, uint lo) {
- uint2 res;
- res.lo = lo;
- res.hi = hi;
- return res;
-}
-
-static uint2 u2_set_u(uint val) { return u2_set(0, val); }
-
-static uint2 u2_mul(uint a, uint b) {
- uint2 res;
- res.hi = __clc_mul_hi(a, b);
- res.lo = a * b;
- return res;
-}
-
-static uint2 u2_sll(uint2 val, uint shift) {
- if (shift == 0)
- return val;
- if (shift < 32) {
- val.hi <<= shift;
- val.hi |= val.lo >> (32 - shift);
- val.lo <<= shift;
- } else {
- val.hi = val.lo << (shift - 32);
- val.lo = 0;
- }
- return val;
-}
-
-static uint2 u2_srl(uint2 val, uint shift) {
- if (shift == 0)
- return val;
- if (shift < 32) {
- val.lo >>= shift;
- val.lo |= val.hi << (32 - shift);
- val.hi >>= shift;
- } else {
- val.lo = val.hi >> (shift - 32);
- val.hi = 0;
- }
- return val;
-}
-
-static uint2 u2_or(uint2 a, uint b) {
- a.lo |= b;
- return a;
-}
-
-static uint2 u2_and(uint2 a, uint2 b) {
- a.lo &= b.lo;
- a.hi &= b.hi;
- return a;
-}
-
-static uint2 u2_add(uint2 a, uint2 b) {
- uint carry = (__clc_hadd(a.lo, b.lo) >> 31) & 0x1;
- a.lo += b.lo;
- a.hi += b.hi + carry;
- return a;
-}
-
-static uint2 u2_add_u(uint2 a, uint b) { return u2_add(a, u2_set_u(b)); }
-
-static uint2 u2_inv(uint2 a) {
- a.lo = ~a.lo;
- a.hi = ~a.hi;
- return u2_add_u(a, 1);
-}
-
-static uint u2_clz(uint2 a) {
- uint leading_zeroes = __clc_clz(a.hi);
- if (leading_zeroes == 32) {
- leading_zeroes += __clc_clz(a.lo);
- }
- return leading_zeroes;
-}
-
-static bool u2_eq(uint2 a, uint2 b) { return a.lo == b.lo && a.hi == b.hi; }
-
-static bool u2_zero(uint2 a) { return u2_eq(a, u2_set_u(0)); }
-
-static bool u2_gt(uint2 a, uint2 b) {
- return a.hi > b.hi || (a.hi == b.hi && a.lo > b.lo);
-}
-
-_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
- /* special cases */
- if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) ||
- __clc_isinf(b)) {
- return __clc_mad(a, b, c);
- }
-
- /* If only c is inf, and both a,b are regular numbers, the result is c*/
- if (__clc_isinf(c)) {
- return c;
- }
-
- a = __clc_soft_flush_denormal(a);
- b = __clc_soft_flush_denormal(b);
- c = __clc_soft_flush_denormal(c);
-
- if (a == 0.0f || b == 0.0f) {
- return c;
- }
-
- if (c == 0) {
- return a * b;
- }
-
- struct fp st_a, st_b, st_c;
-
- st_a.exponent = a == .0f ? 0 : ((__clc_as_uint(a) & 0x7f800000) >> 23) - 127;
- st_b.exponent = b == .0f ? 0 : ((__clc_as_uint(b) & 0x7f800000) >> 23) - 127;
- st_c.exponent = c == .0f ? 0 : ((__clc_as_uint(c) & 0x7f800000) >> 23) - 127;
-
- st_a.mantissa =
- u2_set_u(a == .0f ? 0 : (__clc_as_uint(a) & 0x7fffff) | 0x800000);
- st_b.mantissa =
- u2_set_u(b == .0f ? 0 : (__clc_as_uint(b) & 0x7fffff) | 0x800000);
- st_c.mantissa =
- u2_set_u(c == .0f ? 0 : (__clc_as_uint(c) & 0x7fffff) | 0x800000);
-
- st_a.sign = __clc_as_uint(a) & 0x80000000;
- st_b.sign = __clc_as_uint(b) & 0x80000000;
- st_c.sign = __clc_as_uint(c) & 0x80000000;
-
- // Multiplication.
- // Move the product to the highest bits to maximize precision
- // mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
- // Add one bit for future addition overflow,
- // add another bit to detect subtraction underflow
- struct fp st_mul;
- st_mul.sign = st_a.sign ^ st_b.sign;
- st_mul.mantissa = u2_sll(u2_mul(st_a.mantissa.lo, st_b.mantissa.lo), 14);
- st_mul.exponent =
- !u2_zero(st_mul.mantissa) ? st_a.exponent + st_b.exponent : 0;
-
- // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
- if (st_mul.exponent == 0 && u2_zero(st_mul.mantissa))
- return c;
-
-// Mantissa is 23 fractional bits, shift it the same way as product mantissa
-#define C_ADJUST 37ul
-
- // both exponents are bias adjusted
- int exp_diff = st_mul.exponent - st_c.exponent;
-
- st_c.mantissa = u2_sll(st_c.mantissa, C_ADJUST);
- uint2 cutoff_bits = u2_set_u(0);
- uint2 cutoff_mask = u2_add(u2_sll(u2_set_u(1), __clc_abs(exp_diff)),
- u2_set(0xffffffff, 0xffffffff));
- if (exp_diff > 0) {
- cutoff_bits =
- exp_diff >= 64 ? st_c.mantissa : u2_and(st_c.mantissa, cutoff_mask);
- st_c.mantissa =
- exp_diff >= 64 ? u2_set_u(0) : u2_srl(st_c.mantissa, exp_diff);
- } else {
- cutoff_bits = -exp_diff >= 64 ? st_mul.mantissa
- : u2_and(st_mul.mantissa, cutoff_mask);
- st_mul.mantissa =
- -exp_diff >= 64 ? u2_set_u(0) : u2_srl(st_mul.mantissa, -exp_diff);
- }
-
- struct fp st_fma;
- st_fma.sign = st_mul.sign;
- st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent);
- if (st_c.sign == st_mul.sign) {
- st_fma.mantissa = u2_add(st_mul.mantissa, st_c.mantissa);
- } else {
- // cutoff bits borrow one
- st_fma.mantissa =
- u2_add(u2_add(st_mul.mantissa, u2_inv(st_c.mantissa)),
- (!u2_zero(cutoff_bits) && (st_mul.exponent > st_c.exponent)
- ? u2_set(0xffffffff, 0xffffffff)
- : u2_set_u(0)));
- }
-
- // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
- if (u2_gt(st_fma.mantissa, u2_set(0x7fffffff, 0xffffffff))) {
- st_fma.mantissa = u2_inv(st_fma.mantissa);
- st_fma.sign = st_mul.sign ^ 0x80000000;
- }
-
- // detect overflow/underflow
- int overflow_bits = 3 - u2_clz(st_fma.mantissa);
-
- // adjust exponent
- st_fma.exponent += overflow_bits;
-
- // handle underflow
- if (overflow_bits < 0) {
- st_fma.mantissa = u2_sll(st_fma.mantissa, -overflow_bits);
- overflow_bits = 0;
- }
-
- // rounding
- uint2 trunc_mask = u2_add(u2_sll(u2_set_u(1), C_ADJUST + overflow_bits),
- u2_set(0xffffffff, 0xffffffff));
- uint2 trunc_bits =
- u2_or(u2_and(st_fma.mantissa, trunc_mask), !u2_zero(cutoff_bits));
- uint2 last_bit =
- u2_and(st_fma.mantissa, u2_sll(u2_set_u(1), C_ADJUST + overflow_bits));
- uint2 grs_bits = u2_sll(u2_set_u(4), C_ADJUST - 3 + overflow_bits);
-
- // round to nearest even
- if (u2_gt(trunc_bits, grs_bits) ||
- (u2_eq(trunc_bits, grs_bits) && !u2_zero(last_bit))) {
- st_fma.mantissa =
- u2_add(st_fma.mantissa, u2_sll(u2_set_u(1), C_ADJUST + overflow_bits));
- }
-
- // Shift mantissa back to bit 23
- st_fma.mantissa = u2_srl(st_fma.mantissa, C_ADJUST + overflow_bits);
-
- // Detect rounding overflow
- if (u2_gt(st_fma.mantissa, u2_set_u(0xffffff))) {
- ++st_fma.exponent;
- st_fma.mantissa = u2_srl(st_fma.mantissa, 1);
- }
-
- if (u2_zero(st_fma.mantissa)) {
- return 0.0f;
- }
-
- // Flating point range limit
- if (st_fma.exponent > 127) {
- return __clc_as_float(__clc_as_uint(INFINITY) | st_fma.sign);
- }
-
- // Flush denormals
- if (st_fma.exponent <= -127) {
- return __clc_as_float(st_fma.sign);
- }
-
- return __clc_as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) |
- ((uint)st_fma.mantissa.lo & 0x7fffff));
-}
-
-#define __CLC_FLOAT_ONLY
-#define __CLC_FUNCTION __clc_sw_fma
-#define __CLC_BODY <clc/shared/ternary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 4a6dadc702033..ef35c43ce443e 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -138,7 +138,6 @@ math/clc_sinh.cl
math/clc_sinpi.cl
math/clc_sqrt.cl
math/clc_subnormal_config.cl
-math/clc_sw_fma.cl
math/clc_tables.cl
math/clc_tan.cl
math/clc_tanh.cl
diff --git a/libclc/clc/lib/generic/math/clc_fma.inc b/libclc/clc/lib/generic/math/clc_fma.inc
index b23b6433d2922..a55e9c0f9b2b7 100644
--- a/libclc/clc/lib/generic/math/clc_fma.inc
+++ b/libclc/clc/lib/generic/math/clc_fma.inc
@@ -8,9 +8,5 @@
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_fma(__CLC_GENTYPE a, __CLC_GENTYPE b,
__CLC_GENTYPE c) {
-#if __CLC_FPSIZE == 32
- if (!__CLC_HAVE_HW_FMA32())
- return __clc_sw_fma(a, b, c);
-#endif
return __builtin_elementwise_fma(a, b, c);
}
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index bddc0998cf950..e902bf3830626 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -97,19 +97,9 @@ _CLC_DEF _CLC_OVERLOAD void __clc_fullMulS(private __CLC_FLOATN *hi,
private __CLC_FLOATN *lo,
__CLC_FLOATN a, __CLC_FLOATN b,
__CLC_FLOATN bh, __CLC_FLOATN bt) {
- if (__CLC_HAVE_HW_FMA32()) {
- __CLC_FLOATN ph = a * b;
- *hi = ph;
- *lo = __clc_fma(a, b, -ph);
- } else {
- __CLC_FLOATN ah = __CLC_AS_FLOATN(__CLC_AS_UINTN(a) & 0xfffff000U);
- __CLC_FLOATN at = a - ah;
- __CLC_FLOATN ph = a * b;
- __CLC_FLOATN pt = __clc_mad(
- at, bt, __clc_mad(at, bh, __clc_mad(ah, bt, __clc_mad(ah, bh, -ph))));
- *hi = ph;
- *lo = pt;
- }
+ __CLC_FLOATN ph = a * b;
+ *hi = ph;
+ *lo = __clc_fma(a, b, -ph);
}
_CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_removePi2S(private __CLC_FLOATN *hi,
@@ -280,20 +270,8 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS(
const __CLC_FLOATN pio2t = (__CLC_FLOATN)0xa22168 / 0x1.0p+47f;
__CLC_FLOATN rh, rt;
-
- if (__CLC_HAVE_HW_FMA32()) {
- rh = q1 * pio2h;
- rt = __clc_fma(q0, pio2h, __clc_fma(q1, pio2t, __clc_fma(q1, pio2h, -rh)));
- } else {
- __CLC_FLOATN q1h = __CLC_AS_FLOATN(__CLC_AS_UINTN(q1) & 0xfffff000);
- __CLC_FLOATN q1t = q1 - q1h;
- rh = q1 * pio2h;
- rt = __clc_mad(
- q1t, pio2ht,
- __clc_mad(q1t, pio2hh,
- __clc_mad(q1h, pio2ht, __clc_mad(q1h, pio2hh, -rh))));
- rt = __clc_mad(q0, pio2h, __clc_mad(q1, pio2t, rt));
- }
+ rh = q1 * pio2h;
+ rt = __clc_fma(q0, pio2h, __clc_fma(q1, pio2t, __clc_fma(q1, pio2h, -rh)));
__CLC_FLOATN t = rh + rt;
rt = rt - (t - rh);
diff --git a/libclc/clc/lib/generic/math/clc_sw_fma.cl b/libclc/clc/lib/generic/math/clc_sw_fma.cl
deleted file mode 100644
index e8bf673b50f36..0000000000000
--- a/libclc/clc/lib/generic/math/clc_sw_fma.cl
+++ /dev/null
@@ -1,165 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clc_as_type.h>
-#include <clc/float/definitions.h>
-#include <clc/integer/clc_abs.h>
-#include <clc/integer/clc_clz.h>
-#include <clc/integer/definitions.h>
-#include <clc/internal/clc.h>
-#include <clc/math/clc_mad.h>
-#include <clc/math/math.h>
-#include <clc/relational/clc_isinf.h>
-#include <clc/relational/clc_isnan.h>
-#include <clc/shared/clc_max.h>
-
-struct fp {
- ulong mantissa;
- int exponent;
- uint sign;
-};
-
-_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
- /* special cases */
- if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) ||
- __clc_isinf(b)) {
- return __clc_mad(a, b, c);
- }
-
- /* If only c is inf, and both a,b are regular numbers, the result is c*/
- if (__clc_isinf(c)) {
- return c;
- }
-
- a = __clc_soft_flush_denormal(a);
- b = __clc_soft_flush_denormal(b);
- c = __clc_soft_flush_denormal(c);
-
- if (c == 0) {
- return a * b;
- }
-
- struct fp st_a, st_b, st_c;
-
- st_a.exponent = a == .0f ? 0 : ((__clc_as_uint(a) & 0x7f800000) >> 23) - 127;
- st_b.exponent = b == .0f ? 0 : ((__clc_as_uint(b) & 0x7f800000) >> 23) - 127;
- st_c.exponent = c == .0f ? 0 : ((__clc_as_uint(c) & 0x7f800000) >> 23) - 127;
-
- st_a.mantissa = a == .0f ? 0 : (__clc_as_uint(a) & 0x7fffff) | 0x800000;
- st_b.mantissa = b == .0f ? 0 : (__clc_as_uint(b) & 0x7fffff) | 0x800000;
- st_c.mantissa = c == .0f ? 0 : (__clc_as_uint(c) & 0x7fffff) | 0x800000;
-
- st_a.sign = __clc_as_uint(a) & 0x80000000;
- st_b.sign = __clc_as_uint(b) & 0x80000000;
- st_c.sign = __clc_as_uint(c) & 0x80000000;
-
- // Multiplication.
- // Move the product to the highest bits to maximize precision
- // mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
- // Add one bit for future addition overflow,
- // add another bit to detect subtraction underflow
- struct fp st_mul;
- st_mul.sign = st_a.sign ^ st_b.sign;
- st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul;
- st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0;
-
- // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
- if (st_mul.exponent == 0 && st_mul.mantissa == 0)
- return c;
-
-// Mantissa is 23 fractional bits, shift it the same way as product mantissa
-#define C_ADJUST 37ul
-
- // both exponents are bias adjusted
- int exp_diff = st_mul.exponent - st_c.exponent;
-
- st_c.mantissa <<= C_ADJUST;
- ulong cutoff_bits = 0;
- ulong cutoff_mask = (1ul << __clc_abs(exp_diff)) - 1ul;
- if (exp_diff > 0) {
- cutoff_bits =
- exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask);
- st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff);
- } else {
- cutoff_bits =
- -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask);
- st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff);
- }
-
- struct fp st_fma;
- st_fma.sign = st_mul.sign;
- st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent);
- if (st_c.sign == st_mul.sign) {
- st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
- } else {
- // cutoff bits borrow one
- st_fma.mantissa =
- st_mul.mantissa - st_c.mantissa -
- (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0);
- }
-
- // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
- if (st_fma.mantissa > LONG_MAX) {
- st_fma.mantissa = 0 - st_fma.mantissa;
- st_fma.sign = st_mul.sign ^ 0x80000000;
- }
-
- // detect overflow/underflow
- int overflow_bits = 3 - __clc_clz(st_fma.mantissa);
-
- // adjust exponent
- st_fma.exponent += overflow_bits;
-
- // handle underflow
- if (overflow_bits < 0) {
- st_fma.mantissa <<= -overflow_bits;
- overflow_bits = 0;
- }
-
- // rounding
- ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1;
- ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0);
- ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits));
- ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits));
-
- // round to nearest even
- if ((trunc_bits > grs_bits) || (trunc_bits == grs_bits && last_bit != 0)) {
- st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits));
- }
-
- // Shift mantissa back to bit 23
- st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits));
-
- // Detect rounding overflow
- if (st_fma.mantissa > 0xffffff) {
- ++st_fma.exponent;
- st_fma.mantissa >>= 1;
- }
-
- if (st_fma.mantissa == 0) {
- return .0f;
- }
-
- // Flating point range limit
- if (st_fma.exponent > 127) {
- return __clc_as_float(__clc_as_uint(INFINITY) | st_fma.sign);
- }
-
- // Flush denormals
- if (st_fma.exponent <= -127) {
- return __clc_as_float(st_fma.sign);
- }
-
- return __clc_as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) |
- ((uint)st_fma.mantissa & 0x7fffff));
-}
-
-#define __CLC_FLOAT_ONLY
-#define __CLC_FUNCTION __clc_sw_fma
-#define __CLC_BODY <clc/shared/ternary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index 07bc7aaead8e8..ed63fe6b7c529 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -1,3 +1,2 @@
math/clc_fmax.cl
math/clc_fmin.cl
-math/clc_runtime_has_hw_fma32.cl
diff --git a/libclc/clc/lib/spirv/math/clc_runtime_has_hw_fma32.cl b/libclc/clc/lib/spirv/math/clc_runtime_has_hw_fma32.cl
deleted file mode 100644
index 2f6ad2c5175dd..0000000000000
--- a/libclc/clc/lib/spirv/math/clc_runtime_has_hw_fma32.cl
+++ /dev/null
@@ -1,9 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-bool __clc_runtime_has_hw_fma32() { return false; }
diff --git a/libclc/opencl/lib/clspv/math/fma.cl b/libclc/opencl/lib/clspv/math/fma.cl
index 0e328903ba263..1ea6f034b0d1f 100644
--- a/libclc/opencl/lib/clspv/math/fma.cl
+++ b/libclc/opencl/lib/clspv/math/fma.cl
@@ -11,7 +11,7 @@
#define __CLC_FLOAT_ONLY
#define __CLC_FUNCTION fma
-#define __CLC_IMPL_FUNCTION(x) __clc_sw_fma
+#define __CLC_IMPL_FUNCTION(x) __clc_fma
#define __CLC_BODY <clc/shared/ternary_def.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/opencl/lib/spirv/math/fma.cl b/libclc/opencl/lib/spirv/math/fma.cl
index 0e328903ba263..1ea6f034b0d1f 100644
--- a/libclc/opencl/lib/spirv/math/fma.cl
+++ b/libclc/opencl/lib/spirv/math/fma.cl
@@ -11,7 +11,7 @@
#define __CLC_FLOAT_ONLY
#define __CLC_FUNCTION fma
-#define __CLC_IMPL_FUNCTION(x) __clc_sw_fma
+#define __CLC_IMPL_FUNCTION(x) __clc_fma
#define __CLC_BODY <clc/shared/ternary_def.inc>
#include <clc/math/gentype.inc>
>From 3da9705621c6e3bbce01baba3b2a7e1c3aac6a0c Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 3 Oct 2025 07:50:42 +0200
Subject: [PATCH 7/8] -fdenormal-fp-math-f32 -> -fdenormal-fp-math
---
libclc/CMakeLists.txt | 2 +-
libclc/clc/lib/generic/math/clc_fma.cl | 2 +-
libclc/opencl/lib/clspv/math/fma.cl | 2 +-
libclc/opencl/lib/spirv/math/fma.cl | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 2447d97d2b624..bc6dc416193fb 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -412,7 +412,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Error on undefined macros
-Werror=undef
-fdiscard-value-names
- -Xclang -fdenormal-fp-math-f32=dynamic
+ -Xclang -fdenormal-fp-math=dynamic
)
if( NOT "${cpu}" STREQUAL "" )
diff --git a/libclc/clc/lib/generic/math/clc_fma.cl b/libclc/clc/lib/generic/math/clc_fma.cl
index e69ef614e780f..27ea962af398d 100644
--- a/libclc/clc/lib/generic/math/clc_fma.cl
+++ b/libclc/clc/lib/generic/math/clc_fma.cl
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include <clc/internal/clc.h>
-#include <clc/internal/math/clc_sw_fma.h>
+#include <clc/math/clc_fma.h>
#include <clc/math/math.h>
#define __CLC_BODY <clc_fma.inc>
diff --git a/libclc/opencl/lib/clspv/math/fma.cl b/libclc/opencl/lib/clspv/math/fma.cl
index 1ea6f034b0d1f..5b5b13d81cf68 100644
--- a/libclc/opencl/lib/clspv/math/fma.cl
+++ b/libclc/opencl/lib/clspv/math/fma.cl
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/internal/math/clc_sw_fma.h>
+#include <clc/math/clc_fma.h>
#include <clc/opencl/math/fma.h>
#define __CLC_FLOAT_ONLY
diff --git a/libclc/opencl/lib/spirv/math/fma.cl b/libclc/opencl/lib/spirv/math/fma.cl
index 1ea6f034b0d1f..5b5b13d81cf68 100644
--- a/libclc/opencl/lib/spirv/math/fma.cl
+++ b/libclc/opencl/lib/spirv/math/fma.cl
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/internal/math/clc_sw_fma.h>
+#include <clc/math/clc_fma.h>
#include <clc/opencl/math/fma.h>
#define __CLC_FLOAT_ONLY
>From 7d21a1a82f05735c1bcfdf23ef8ac0171be85bfc Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 3 Oct 2025 09:03:43 +0200
Subject: [PATCH 8/8] remove -Xclang before -fdenormal-fp-math=dynamic
---
libclc/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index bc6dc416193fb..97896715e2712 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -412,7 +412,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Error on undefined macros
-Werror=undef
-fdiscard-value-names
- -Xclang -fdenormal-fp-math=dynamic
+ -fdenormal-fp-math=dynamic
)
if( NOT "${cpu}" STREQUAL "" )
More information about the cfe-commits
mailing list