[libclc] 586cacd - [libclc] Optimize generic CLC fmin/fmax (#128506)

Tue Jul 29 05:21:45 PDT 2025

Author: Fraser Cormack
Date: 2025-07-29T13:21:42+01:00
New Revision: 586cacdbdd995d2a2f010f7152843745f4978b4b

URL: https://github.com/llvm/llvm-project/commit/586cacdbdd995d2a2f010f7152843745f4978b4b
DIFF: https://github.com/llvm/llvm-project/commit/586cacdbdd995d2a2f010f7152843745f4978b4b.diff

LOG: [libclc] Optimize generic CLC fmin/fmax (#128506)

With this commit, the CLC fmin/fmax builtins use clang's
__builtin_elementwise_(min|max)imumnum which helps us generate LLVM
minimumnum/maximumnum intrinsics directly. These intrinsics uniformly
select the non-NaN input over the (quiet or signalling) NaN input, which
corresponds to what the OpenCL CTS tests.

These intrinsics maintain the vector types, as opposed to scalarizing,
which was previously happening. This commit therefore helps to optimize
codegen for those targets.

Note that there is ongoing discussion regarding how these builtins
should handle signalling NaNs in the OpenCL specification and whether
they should be able to return a quiet NaN as per the IEEE behaviour. If
the specification and/or CTS is ever updated to allow or mandate
returning a qNAN, these builtins could/should be updated to use
__builtin_elementwise_(min|max)num instead which would lower to LLVM
minnum/maxnum intrinsics.

The SPIR-V targets maintain the old implementations, as the LLVM ->
SPIR-V translator can't currently handle the LLVM intrinsics. The
implementation has been simplifies to consistently use clang builtins,
as opposed to before where the half version was explicitly defined.

[1] https://github.com/KhronosGroup/OpenCL-CTS/pull/2285

Added: 
    libclc/clc/lib/spirv/math/clc_fmax.cl
    libclc/clc/lib/spirv/math/clc_fmin.cl

Modified: 
    libclc/clc/lib/amdgcn/SOURCES
    libclc/clc/lib/generic/math/clc_fmax.cl
    libclc/clc/lib/generic/math/clc_fmin.cl
    libclc/clc/lib/r600/SOURCES
    libclc/clc/lib/spirv/SOURCES

Removed: 
    libclc/clc/lib/amdgcn/math/clc_fmax.cl
    libclc/clc/lib/amdgcn/math/clc_fmin.cl
    libclc/clc/lib/r600/math/clc_fmax.cl
    libclc/clc/lib/r600/math/clc_fmin.cl


################################################################################
diff  --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index 7bec1740f7636..d91f08533e149 100644

--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,5 +1,3 @@
-math/clc_fmax.cl
-math/clc_fmin.cl
 math/clc_ldexp_override.cl
 workitem/clc_get_global_offset.cl
 workitem/clc_get_global_size.cl

diff  --git a/libclc/clc/lib/generic/math/clc_fmax.cl b/libclc/clc/lib/generic/math/clc_fmax.cl
index 5ebbf1b28df9c..b334207365b98 100644
--- a/libclc/clc/lib/generic/math/clc_fmax.cl
+++ b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -6,53 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmaxf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
+#define __IMPL_FUNCTION(x) __builtin_elementwise_maximumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (x < y) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif

diff  --git a/libclc/clc/lib/generic/math/clc_fmin.cl b/libclc/clc/lib/generic/math/clc_fmin.cl
index 5bddbb8634126..d21bb8d076790 100644
--- a/libclc/clc/lib/generic/math/clc_fmin.cl
+++ b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -6,52 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fminf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
+#define __IMPL_FUNCTION(x) __builtin_elementwise_minimumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
-
-#endif

diff  --git a/libclc/clc/lib/r600/SOURCES b/libclc/clc/lib/r600/SOURCES
index 75d32f4f535bc..8d5caf167aa4e 100644
--- a/libclc/clc/lib/r600/SOURCES
+++ b/libclc/clc/lib/r600/SOURCES
@@ -1,4 +1,2 @@
-math/clc_fmax.cl
-math/clc_fmin.cl
 math/clc_native_rsqrt.cl
 math/clc_rsqrt_override.cl

diff  --git a/libclc/clc/lib/r600/math/clc_fmax.cl b/libclc/clc/lib/r600/math/clc_fmax.cl
deleted file mode 100644
index 689e51a9829aa..0000000000000
--- a/libclc/clc/lib/r600/math/clc_fmax.cl
+++ /dev/null
@@ -1,41 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/math/math.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
-  // Flush denormals if not enabled. Otherwise fmax instruction flushes the
-  // values for comparison, but outputs original denormal
-  x = __clc_flush_denormal_if_not_supported(x);
-  y = __clc_flush_denormal_if_not_supported(y);
-  return __builtin_fmaxf(x, y);
-}
-
-#define __FLOAT_ONLY
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
-  return __builtin_fmax(x, y);
-}
-
-#define __DOUBLE_ONLY
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif

diff  --git a/libclc/clc/lib/r600/math/clc_fmin.cl b/libclc/clc/lib/r600/math/clc_fmin.cl
deleted file mode 100644
index 22cb7046a4ce3..0000000000000
--- a/libclc/clc/lib/r600/math/clc_fmin.cl
+++ /dev/null
@@ -1,42 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/math/math.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
-  // fmin instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __clc_flush_denormal_if_not_supported(x);
-  y = __clc_flush_denormal_if_not_supported(y);
-  return __builtin_fminf(x, y);
-}
-
-#define __FLOAT_ONLY
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
-  return __builtin_fmin(x, y);
-}
-
-#define __DOUBLE_ONLY
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif

diff  --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index cd6e0b2ea7088..07bc7aaead8e8 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -1 +1,3 @@
+math/clc_fmax.cl
+math/clc_fmin.cl
 math/clc_runtime_has_hw_fma32.cl

diff  --git a/libclc/clc/lib/amdgcn/math/clc_fmax.cl b/libclc/clc/lib/spirv/math/clc_fmax.cl
similarity index 67%
rename from libclc/clc/lib/amdgcn/math/clc_fmax.cl
rename to libclc/clc/lib/spirv/math/clc_fmax.cl
index cea90a7135d5a..be660fe2b29ba 100644
--- a/libclc/clc/lib/amdgcn/math/clc_fmax.cl
+++ b/libclc/clc/lib/spirv/math/clc_fmax.cl
@@ -8,40 +8,23 @@
 
 #include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
 _CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
-  // fmax instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __builtin_canonicalizef(x);
-  y = __builtin_canonicalizef(y);
   return __builtin_fmaxf(x, y);
 }
 
 #ifdef cl_khr_fp64
-
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
 _CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
-  x = __builtin_canonicalize(x);
-  y = __builtin_canonicalize(y);
   return __builtin_fmax(x, y);
 }
-
 #endif
-#ifdef cl_khr_fp16
 
+#ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
 _CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? x : y;
+  return __builtin_fmaxf16(x, y);
 }
-
 #endif
 
 #define FUNCTION __clc_fmax

diff  --git a/libclc/clc/lib/amdgcn/math/clc_fmin.cl b/libclc/clc/lib/spirv/math/clc_fmin.cl
similarity index 67%
rename from libclc/clc/lib/amdgcn/math/clc_fmin.cl
rename to libclc/clc/lib/spirv/math/clc_fmin.cl
index 12bb0c64429fd..9f3fa66c0ab7a 100644
--- a/libclc/clc/lib/amdgcn/math/clc_fmin.cl
+++ b/libclc/clc/lib/spirv/math/clc_fmin.cl
@@ -8,41 +8,23 @@
 
 #include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
 _CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
-  // fmin instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __builtin_canonicalizef(x);
-  y = __builtin_canonicalizef(y);
   return __builtin_fminf(x, y);
 }
 
 #ifdef cl_khr_fp64
-
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
 _CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
-  x = __builtin_canonicalize(x);
-  y = __builtin_canonicalize(y);
   return __builtin_fmin(x, y);
 }
-
 #endif
 
 #ifdef cl_khr_fp16
-
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
 _CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
+  return __builtin_fminf16(x, y);
 }
-
 #endif
 
 #define FUNCTION __clc_fmin