[libclc] [libclc] Move fmin/fmax to the CLC library (PR #128506)

Mon Feb 24 04:44:31 PST 2025

https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/128506

Note the CLC versions of these builtins don't offer the vector/scalar forms, for simplicity. The OpenCL layer converts the vector/scalar form to vector/vector.

The CLC builtins use clang's __builtin_elementwise_(min|max) which helps us generate llvm.(min|max)num intrinsics directly. These intrinsics select the non-NAN input over the NAN input, which adheres to the OpenCL specification. Note that the OpenCL specification doesn't require support for sNAN, so returning qNAN over sNAN is acceptable. Note also that the intrinsics don't differentiate between -0.0 and +0.0; this does not appear to be required - going by the OpenCL CTS, at least.

These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets.

>From 43d4d7d1fd03e3692c957ff4b7609cfe0b03a9ad Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 24 Feb 2025 12:25:22 +0000
Subject: [PATCH] [libclc] Move fmin/fmax to the CLC library

Note the CLC versions of these builtins don't offer the vector/scalar
forms, for simplicity. The OpenCL layer converts the vector/scalar form
to vector/vector.

The CLC builtins use clang's __builtin_elementwise_(min|max) which helps
us generate llvm.(min|max)num intrinsics directly. These intrinsics
select the non-NAN input over the NAN input, which adheres to the OpenCL
specification. Note that the OpenCL specification doesn't require
support for sNAN, so returning qNAN over sNAN is acceptable. Note also
that the intrinsics don't differentiate between -0.0 and +0.0; this does
not appear to be required - going by the OpenCL CTS, at least.

These intrinsics maintain the vector types, as opposed to scalarizing,
which was previously happening. This commit therefore helps to optimize
codegen for those targets.
---
 .../clc/include/clc/math/binary_builtin.inc   | 27 +++++++++++++++++
 libclc/clc/include/clc/math/clc_fmax.h        | 12 ++++++++
 libclc/clc/include/clc/math/clc_fmin.h        | 12 ++++++++
 .../binary_decl_with_scalar_second_arg.inc    |  7 +++++
 .../binary_def_with_scalar_second_arg.inc     | 17 +++++++++++
 libclc/clc/lib/generic/SOURCES                |  4 ++-
 libclc/clc/lib/generic/math/clc_fmax.cl       |  6 ++++
 libclc/clc/lib/generic/math/clc_fmin.cl       |  6 ++++
 libclc/generic/lib/math/fmax.cl               | 29 ++-----------------
 libclc/generic/lib/math/fmin.cl               | 28 ++----------------
 10 files changed, 96 insertions(+), 52 deletions(-)
 create mode 100644 libclc/clc/include/clc/math/binary_builtin.inc
 create mode 100644 libclc/clc/include/clc/math/clc_fmax.h
 create mode 100644 libclc/clc/include/clc/math/clc_fmin.h
 create mode 100644 libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc
 create mode 100644 libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc
 create mode 100644 libclc/clc/lib/generic/math/clc_fmax.cl
 create mode 100644 libclc/clc/lib/generic/math/clc_fmin.cl

diff --git a/libclc/clc/include/clc/math/binary_builtin.inc b/libclc/clc/include/clc/math/binary_builtin.inc
new file mode 100644
index 0000000000000..da1d8d9cc92a6
--- /dev/null
+++ b/libclc/clc/include/clc/math/binary_builtin.inc
@@ -0,0 +1,27 @@
+#include <clc/clcmacro.h>
+#include <clc/utils.h>
+
+#ifndef __CLC_BUILTIN
+#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
+#endif
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __CLC_FUNCTION, __CLC_BUILTIN,
+                                        float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __CLC_FUNCTION, __CLC_BUILTIN,
+                                        double, double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __CLC_FUNCTION, __CLC_BUILTIN,
+                                        half, half)
+
+#endif
diff --git a/libclc/clc/include/clc/math/clc_fmax.h b/libclc/clc/include/clc/math/clc_fmax.h
new file mode 100644
index 0000000000000..4f13794beb399
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_fmax.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_FMAX_H__
+#define __CLC_MATH_CLC_FMAX_H__
+
+#define __CLC_FUNCTION __clc_fmax
+#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_FMAX_H__
diff --git a/libclc/clc/include/clc/math/clc_fmin.h b/libclc/clc/include/clc/math/clc_fmin.h
new file mode 100644
index 0000000000000..f8d0012098840
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_fmin.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_FMIN_H__
+#define __CLC_MATH_CLC_FMIN_H__
+
+#define __CLC_FUNCTION __clc_fmin
+#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_FMIN_H__
diff --git a/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc b/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc
new file mode 100644
index 0000000000000..bec8575d8cc24
--- /dev/null
+++ b/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc
@@ -0,0 +1,7 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
+                                                     __CLC_GENTYPE y);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
+                                                     __CLC_SCALAR_GENTYPE y);
+#endif
diff --git a/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc b/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc
new file mode 100644
index 0000000000000..c02f3ea0f7017
--- /dev/null
+++ b/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc
@@ -0,0 +1,17 @@
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
+                                              __CLC_GENTYPE b) {
+  return __CLC_FUNCTION(FUNCTION)(a, b);
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
+                                              __CLC_SCALAR_GENTYPE b) {
+  return __CLC_FUNCTION(FUNCTION)(a, (__CLC_GENTYPE)b);
+}
+#endif
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 0ab563df6a274..8d917461ccee0 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -20,8 +20,10 @@ integer/clc_upsample.cl
 math/clc_ceil.cl
 math/clc_copysign.cl
 math/clc_fabs.cl
-math/clc_fma.cl
 math/clc_floor.cl
+math/clc_fma.cl
+math/clc_fmax.cl
+math/clc_fmin.cl
 math/clc_frexp.cl
 math/clc_mad.cl
 math/clc_modf.cl
diff --git a/libclc/clc/lib/generic/math/clc_fmax.cl b/libclc/clc/lib/generic/math/clc_fmax.cl
new file mode 100644
index 0000000000000..82b1c5894d3ec
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_fmax
+#define __CLC_BUILTIN __builtin_elementwise_max
+#include <clc/math/binary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_fmin.cl b/libclc/clc/lib/generic/math/clc_fmin.cl
new file mode 100644
index 0000000000000..5411280835b1a
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_fmin
+#define __CLC_BUILTIN __builtin_elementwise_min
+#include <clc/math/binary_builtin.inc>
diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl
index c42fe4f54a9e6..44a27b8b7bdec 100644
--- a/libclc/generic/lib/math/fmax.cl
+++ b/libclc/generic/lib/math/fmax.cl
@@ -1,31 +1,8 @@
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
+#include <clc/math/clc_fmax.h>
 
-_CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float);
+#define FUNCTION fmax
+#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
 
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
-{
-   if (isnan(x))
-      return y;
-   if (isnan(y))
-      return x;
-   return (x < y) ? y : x;
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
-
-#endif
-
-#define __CLC_BODY <fmax.inc>
 #include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl
index 55575d0486b60..40615ea89c9a0 100644
--- a/libclc/generic/lib/math/fmin.cl
+++ b/libclc/generic/lib/math/fmin.cl
@@ -1,30 +1,8 @@
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
+#include <clc/math/clc_fmin.h>
 
-_CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
+#define FUNCTION fmin
+#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
 
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
-{
-   if (isnan(x))
-      return y;
-   if (isnan(y))
-      return x;
-   return (y < x) ? y : x;
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
-
-#endif
-
-#define __CLC_BODY <fmin.inc>
 #include <clc/math/gentype.inc>