[libclc] [libclc] Fix exp and exp2 float implementation subnormal handling (PR #179875)
Wenju He via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 5 22:09:48 PST 2026
https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/179875
>From 85b513d282e3006973af015c0682cedc24ae1515 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Thu, 5 Feb 2026 07:31:07 +0100
Subject: [PATCH 1/3] [libclc] Fix exp and exp2 float implementation subnormal
handling
Float implementation fail OpenCL CTS on intel gpu:
ERROR: exp: -8377325.500000 ulp error at -0x1.5d5ap+6 (0xc2aead00): *0x1.ff4fb8p-127 vs. 0x0p+0
ERROR: exp2: -8377259.000000 ulp error at -0x1.f802p+6 (0xc2fc0100): *0x1.ff4eacp-127 vs. 0x0p+0
Root cause:
* `Scale by 2^p` doesn't work if p is smaller than -126 (subnormal range).
* llim check flushes subnormal result to 0.
Fix by splitting the exponent to preserves subnormal results and
lowering llim to the smallest subnormal range.
---
libclc/clc/lib/generic/math/clc_exp.inc | 13 +++++++++----
libclc/clc/lib/generic/math/clc_exp2.inc | 10 +++++++---
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/libclc/clc/lib/generic/math/clc_exp.inc b/libclc/clc/lib/generic/math/clc_exp.inc
index 5057bf8034e92..c6131c55690ae 100644
--- a/libclc/clc/lib/generic/math/clc_exp.inc
+++ b/libclc/clc/lib/generic/math/clc_exp.inc
@@ -37,16 +37,21 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
- // Scale by 2^p
- __CLC_GENTYPE r = __CLC_AS_GENTYPE(__CLC_AS_INTN(y) + (p << 23));
+ // Split the exponent to preserves subnormal results when p is very negative.
+ __CLC_INTN p1 = p / 2;
+ __CLC_INTN p2 = p - p1;
+ __CLC_GENTYPE s1 = __CLC_AS_GENTYPE((p1 + 127) << 23);
+ __CLC_GENTYPE s2 = __CLC_AS_GENTYPE((p2 + 127) << 23);
+ __CLC_GENTYPE r = y * s1 * s2;
// ln(largest_normal) = 88.72283905206835305366
const __CLC_GENTYPE ulim = 0x1.62e430p+6f;
- // ln(smallest_normal) = -87.33654475055310898657
- const __CLC_GENTYPE llim = -0x1.5d589ep+6f;
+ // ln(smallest_subnormal)
+ const __CLC_GENTYPE llim = -0x1.9d1da0p+6f;
r = x < llim ? 0.0f : r;
r = x < ulim ? r : __CLC_AS_GENTYPE((__CLC_UINTN)0x7f800000);
+
return __clc_isnan(x) ? x : r;
}
diff --git a/libclc/clc/lib/generic/math/clc_exp2.inc b/libclc/clc/lib/generic/math/clc_exp2.inc
index 6da361a43ed4c..8526dcb81feda 100644
--- a/libclc/clc/lib/generic/math/clc_exp2.inc
+++ b/libclc/clc/lib/generic/math/clc_exp2.inc
@@ -36,11 +36,15 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
- // Scale by 2^p
- __CLC_GENTYPE r = __CLC_AS_FLOATN(__CLC_AS_INTN(y) + (p << 23));
+ // Split the exponent to preserves subnormal results when p is very negative.
+ __CLC_INTN p1 = p / 2;
+ __CLC_INTN p2 = p - p1;
+ __CLC_GENTYPE s1 = __CLC_AS_GENTYPE((p1 + 127) << 23);
+ __CLC_GENTYPE s2 = __CLC_AS_GENTYPE((p2 + 127) << 23);
+ __CLC_GENTYPE r = y * s1 * s2;
const __CLC_GENTYPE ulim = 128.0f;
- const __CLC_GENTYPE llim = -126.0f;
+ const __CLC_GENTYPE llim = -149.0f;
r = x < llim ? 0.0f : r;
r = x < ulim ? r : __CLC_AS_FLOATN((__CLC_UINTN)0x7f800000);
>From 61adcb24dfbee4822250da963948a67bdb03c770 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Thu, 5 Feb 2026 09:31:42 +0100
Subject: [PATCH 2/3] use right shift
---
libclc/clc/lib/generic/math/clc_exp.inc | 2 +-
libclc/clc/lib/generic/math/clc_exp2.inc | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/libclc/clc/lib/generic/math/clc_exp.inc b/libclc/clc/lib/generic/math/clc_exp.inc
index c6131c55690ae..ef836ce35a7a3 100644
--- a/libclc/clc/lib/generic/math/clc_exp.inc
+++ b/libclc/clc/lib/generic/math/clc_exp.inc
@@ -38,7 +38,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
// Split the exponent to preserves subnormal results when p is very negative.
- __CLC_INTN p1 = p / 2;
+ __CLC_INTN p1 = p >> 1;
__CLC_INTN p2 = p - p1;
__CLC_GENTYPE s1 = __CLC_AS_GENTYPE((p1 + 127) << 23);
__CLC_GENTYPE s2 = __CLC_AS_GENTYPE((p2 + 127) << 23);
diff --git a/libclc/clc/lib/generic/math/clc_exp2.inc b/libclc/clc/lib/generic/math/clc_exp2.inc
index 8526dcb81feda..6520c69e94601 100644
--- a/libclc/clc/lib/generic/math/clc_exp2.inc
+++ b/libclc/clc/lib/generic/math/clc_exp2.inc
@@ -37,7 +37,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
// Split the exponent to preserves subnormal results when p is very negative.
- __CLC_INTN p1 = p / 2;
+ __CLC_INTN p1 = p >> 1;
__CLC_INTN p2 = p - p1;
__CLC_GENTYPE s1 = __CLC_AS_GENTYPE((p1 + 127) << 23);
__CLC_GENTYPE s2 = __CLC_AS_GENTYPE((p2 + 127) << 23);
>From 4298d516f5b0d334953baa27603231b677695e82 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 6 Feb 2026 07:09:15 +0100
Subject: [PATCH 3/3] add back original comment
---
libclc/clc/lib/generic/math/clc_exp.inc | 1 +
libclc/clc/lib/generic/math/clc_exp2.inc | 1 +
2 files changed, 2 insertions(+)
diff --git a/libclc/clc/lib/generic/math/clc_exp.inc b/libclc/clc/lib/generic/math/clc_exp.inc
index ef836ce35a7a3..786b4a4f5cd52 100644
--- a/libclc/clc/lib/generic/math/clc_exp.inc
+++ b/libclc/clc/lib/generic/math/clc_exp.inc
@@ -37,6 +37,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
+ // Scale by 2^p
// Split the exponent to preserves subnormal results when p is very negative.
__CLC_INTN p1 = p >> 1;
__CLC_INTN p2 = p - p1;
diff --git a/libclc/clc/lib/generic/math/clc_exp2.inc b/libclc/clc/lib/generic/math/clc_exp2.inc
index 6520c69e94601..ac5a86f87d754 100644
--- a/libclc/clc/lib/generic/math/clc_exp2.inc
+++ b/libclc/clc/lib/generic/math/clc_exp2.inc
@@ -36,6 +36,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
+ // Scale by 2^p
// Split the exponent to preserves subnormal results when p is very negative.
__CLC_INTN p1 = p >> 1;
__CLC_INTN p2 = p - p1;
More information about the cfe-commits
mailing list