[libclc] 285b411 - [libclc] Move sqrt to CLC library (#128748)

Thu Feb 27 04:30:27 PST 2025

Author: Fraser Cormack
Date: 2025-02-27T12:30:24Z
New Revision: 285b411e4635e8db2526d653488ee54dad2bff34

URL: https://github.com/llvm/llvm-project/commit/285b411e4635e8db2526d653488ee54dad2bff34
DIFF: https://github.com/llvm/llvm-project/commit/285b411e4635e8db2526d653488ee54dad2bff34.diff

LOG: [libclc] Move sqrt to CLC library (#128748)

This is fairly straightforward for most targets.

We use the element-wise sqrt builtin by default. We also remove a legacy
pre-filtering of the input argument, which the intrinsic now officially
handles.

AMDGPU provides its own implementation of sqrt for double types. This
commit moves this into the implementation of CLC sqrt. It uses weak
linkage on the 'default' CLC sqrt to allow AMDGPU to only override the
builtin for the types it cares about.

Added: 
    libclc/clc/include/clc/math/clc_sqrt.h
    libclc/clc/lib/amdgpu/SOURCES
    libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
    libclc/clc/lib/generic/math/clc_sqrt.cl
    libclc/clc/lib/generic/math/clc_sqrt.inc

Modified: 
    libclc/CMakeLists.txt
    libclc/amdgpu/lib/SOURCES
    libclc/clc/include/clc/float/definitions.h
    libclc/clc/lib/generic/SOURCES
    libclc/generic/lib/SOURCES
    libclc/generic/lib/math/clc_hypot.cl
    libclc/generic/lib/math/sqrt.cl

Removed: 
    libclc/amdgpu/lib/math/sqrt.cl
    libclc/generic/include/math/clc_sqrt.h
    libclc/generic/lib/math/clc_sqrt.cl
    libclc/generic/lib/math/clc_sqrt_impl.inc


################################################################################
diff  --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index fb5f3638bc4e4..8f076be1599db 100644

--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -29,6 +29,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
   # CLC internal libraries
   clc/lib/generic/SOURCES;
   clc/lib/amdgcn/SOURCES;
+  clc/lib/amdgpu/SOURCES;
   clc/lib/clspv/SOURCES;
   clc/lib/spirv/SOURCES;
 )

diff  --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES
index 24f099d049cd3..d7782a2ae14dc 100644
--- a/libclc/amdgpu/lib/SOURCES
+++ b/libclc/amdgpu/lib/SOURCES
@@ -10,4 +10,3 @@ math/half_log2.cl
 math/half_recip.cl
 math/half_rsqrt.cl
 math/half_sqrt.cl
-math/sqrt.cl

diff  --git a/libclc/clc/include/clc/float/definitions.h b/libclc/clc/include/clc/float/definitions.h
index 618d02ab1c090..82ae90155be1d 100644
--- a/libclc/clc/include/clc/float/definitions.h
+++ b/libclc/clc/include/clc/float/definitions.h
@@ -1,7 +1,6 @@
 #define MAXFLOAT 0x1.fffffep127f
 #define HUGE_VALF __builtin_huge_valf()
 #define INFINITY __builtin_inff()
-#define NAN __builtin_nanf("")
 
 #define FLT_DIG 6
 #define FLT_MANT_DIG 24
@@ -13,6 +12,7 @@
 #define FLT_MAX MAXFLOAT
 #define FLT_MIN 0x1.0p-126f
 #define FLT_EPSILON 0x1.0p-23f
+#define FLT_NAN __builtin_nanf("")
 
 #define FP_ILOGB0 (-2147483647 - 1)
 #define FP_ILOGBNAN 2147483647
@@ -46,6 +46,7 @@
 #define DBL_MAX 0x1.fffffffffffffp1023
 #define DBL_MIN 0x1.0p-1022
 #define DBL_EPSILON 0x1.0p-52
+#define DBL_NAN __builtin_nan("")
 
 #define M_E 0x1.5bf0a8b145769p+1
 #define M_LOG2E 0x1.71547652b82fep+0
@@ -80,6 +81,7 @@
 #define HALF_MAX 0x1.ffcp15h
 #define HALF_MIN 0x1.0p-14h
 #define HALF_EPSILON 0x1.0p-10h
+#define HALF_NAN __builtin_nanf16("")
 
 #define M_LOG2E_H 0x1.714p+0h
 

diff  --git a/libclc/generic/include/math/clc_sqrt.h b/libclc/clc/include/clc/math/clc_sqrt.h
similarity index 60%
rename from libclc/generic/include/math/clc_sqrt.h
rename to libclc/clc/include/clc/math/clc_sqrt.h
index 90a7c575c9bad..c16edf196d9f6 100644
--- a/libclc/generic/include/math/clc_sqrt.h
+++ b/libclc/clc/include/clc/math/clc_sqrt.h
@@ -1,8 +1,12 @@
-#include <clc/clcfunc.h>
-#include <clc/clctypes.h>
+#ifndef __CLC_MATH_CLC_SQRT_H__
+#define __CLC_MATH_CLC_SQRT_H__
 
-#define __CLC_FUNCTION __clc_sqrt
 #define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_sqrt
+
 #include <clc/math/gentype.inc>
+
 #undef __CLC_BODY
 #undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_SQRT_H__

diff  --git a/libclc/clc/lib/amdgpu/SOURCES b/libclc/clc/lib/amdgpu/SOURCES
new file mode 100644
index 0000000000000..fd64a862021e8
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/SOURCES
@@ -0,0 +1 @@
+math/clc_sqrt_fp64.cl

diff  --git a/libclc/amdgpu/lib/math/sqrt.cl b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
similarity index 64%
rename from libclc/amdgpu/lib/math/sqrt.cl
rename to libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
index 17d77e50d44d3..c5614659eb5ce 100644
--- a/libclc/amdgpu/lib/math/sqrt.cl
+++ b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
@@ -20,52 +20,43 @@
  * THE SOFTWARE.
  */
 
-#include "math/clc_sqrt.h"
-#include <clc/clc.h>
 #include <clc/clcmacro.h>
-
-_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
-
-#endif
+#include <clc/internal/clc.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_ldexp.h>
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 #ifdef __AMDGCN__
-  #define __clc_builtin_rsq __builtin_amdgcn_rsq
+#define __clc_builtin_rsq __builtin_amdgcn_rsq
 #else
-  #define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
+#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
 #endif
 
-_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {
-
+_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
   uint vcc = x < 0x1p-767;
   uint exp0 = vcc ? 0x100 : 0;
   unsigned exp1 = vcc ? 0xffffff80 : 0;
 
-  double v01 = ldexp(x, exp0);
+  double v01 = __clc_ldexp(x, exp0);
   double v23 = __clc_builtin_rsq(v01);
   double v45 = v01 * v23;
   v23 = v23 * 0.5;
 
-  double v67 = fma(-v23, v45, 0.5);
-  v45 = fma(v45, v67, v45);
-  double v89 = fma(-v45, v45, v01);
-  v23 = fma(v23, v67, v23);
-  v45 = fma(v89, v23, v45);
-  v67 = fma(-v45, v45, v01);
-  v23 = fma(v67, v23, v45);
+  double v67 = __clc_fma(-v23, v45, 0.5);
+  v45 = __clc_fma(v45, v67, v45);
+  double v89 = __clc_fma(-v45, v45, v01);
+  v23 = __clc_fma(v23, v67, v23);
+  v45 = __clc_fma(v89, v23, v45);
+  v67 = __clc_fma(-v45, v45, v01);
+  v23 = __clc_fma(v67, v23, v45);
 
-  v23 = ldexp(v23, exp1);
-  return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
+  v23 = __clc_ldexp(v23, exp1);
+  return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
 }
 
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double);
 
 #endif

diff  --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 1d16cd5e2d18a..97c504d21aa88 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -33,6 +33,7 @@ math/clc_nan.cl
 math/clc_nextafter.cl
 math/clc_rint.cl
 math/clc_round.cl
+math/clc_sqrt.cl
 math/clc_sw_fma.cl
 math/clc_trunc.cl
 relational/clc_all.cl

diff  --git a/libclc/generic/lib/math/clc_sqrt.cl b/libclc/clc/lib/generic/math/clc_sqrt.cl
similarity index 80%
rename from libclc/generic/lib/math/clc_sqrt.cl
rename to libclc/clc/lib/generic/math/clc_sqrt.cl
index 92c7f6e73b11e..620c367dd8510 100644
--- a/libclc/generic/lib/math/clc_sqrt.cl
+++ b/libclc/clc/lib/generic/math/clc_sqrt.cl
@@ -20,14 +20,8 @@
  * THE SOFTWARE.
  */
 
-#include <clc/clc.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
 
-// Map the llvm sqrt intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_llvm_intr_sqrt
-#define __CLC_INTRINSIC "llvm.sqrt"
-#include <clc/math/unary_intrin.inc>
-#undef __CLC_FUNCTION
-#undef __CLC_INTRINSIC
-
-#define __CLC_BODY <clc_sqrt_impl.inc>
+#define __CLC_BODY <clc_sqrt.inc>
 #include <clc/math/gentype.inc>

diff  --git a/libclc/generic/lib/math/clc_sqrt_impl.inc b/libclc/clc/lib/generic/math/clc_sqrt.inc
similarity index 75%
rename from libclc/generic/lib/math/clc_sqrt_impl.inc
rename to libclc/clc/lib/generic/math/clc_sqrt.inc
index fe724e8c14394..6c1c1bb7960e5 100644
--- a/libclc/generic/lib/math/clc_sqrt_impl.inc
+++ b/libclc/clc/lib/generic/math/clc_sqrt.inc
@@ -20,20 +20,7 @@
  * THE SOFTWARE.
  */
 
-#if __CLC_FPSIZE == 64
-#define __CLC_NAN __builtin_nan("")
-#define ZERO 0.0
-#elif __CLC_FPSIZE == 32
-#define __CLC_NAN NAN
-#define ZERO 0.0f
-#elif __CLC_FPSIZE == 16
-#define __CLC_NAN (half)NAN
-#define ZERO 0.0h
-#endif
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
-  return val < ZERO ? __CLC_NAN : __clc_llvm_intr_sqrt(val);
+__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
+__clc_sqrt(__CLC_GENTYPE val) {
+  return __builtin_elementwise_sqrt(val);
 }
-
-#undef __CLC_NAN
-#undef ZERO

diff  --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
index eb14fd84c96b3..5b1c85d6ef75b 100644
--- a/libclc/generic/lib/SOURCES
+++ b/libclc/generic/lib/SOURCES
@@ -179,7 +179,6 @@ math/sincos.cl
 math/sincos_helpers.cl
 math/sinh.cl
 math/sinpi.cl
-math/clc_sqrt.cl
 math/sqrt.cl
 math/clc_tan.cl
 math/tan.cl

diff  --git a/libclc/generic/lib/math/clc_hypot.cl b/libclc/generic/lib/math/clc_hypot.cl
index 5e6a99b70f22a..fdf1e7ffa1def 100644
--- a/libclc/generic/lib/math/clc_hypot.cl
+++ b/libclc/generic/lib/math/clc_hypot.cl
@@ -27,6 +27,7 @@
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
+#include <clc/math/clc_sqrt.h>
 #include <clc/relational/clc_isnan.h>
 #include <clc/shared/clc_clamp.h>
 #include <math/clc_hypot.h>
@@ -49,7 +50,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
   float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
   float fx = as_float(ux) * fi_exp;
   float fy = as_float(uy) * fi_exp;
-  retval = sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
+  retval = __clc_sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
 
   retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
   retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32
@@ -81,7 +82,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
   double ay = y * preadjust;
 
   // The post adjust may overflow, but this can't be avoided in any case
-  double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
+  double r = __clc_sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
 
   // If the 
diff erence in exponents between x and y is large
   double s = x + y;

diff  --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl
index a9192a9493d17..d60d304fa1e1f 100644
--- a/libclc/generic/lib/math/sqrt.cl
+++ b/libclc/generic/lib/math/sqrt.cl
@@ -21,7 +21,9 @@
  */
 
 #include <clc/clc.h>
-#include "math/clc_sqrt.h"
+#include <clc/math/clc_sqrt.h>
 
-#define __CLC_FUNCTION sqrt
-#include <clc/math/unary_builtin.inc>
+#define FUNCTION sqrt
+#define __CLC_BODY <clc/shared/unary_def.inc>
+
+#include <clc/math/gentype.inc>