[libclc] 06789cc - [libclc] Optimize ceil/fabs/floor/rint/trunc (#119596)

Fri Dec 13 00:47:17 PST 2024

Author: Fraser Cormack
Date: 2024-12-13T08:47:13Z
New Revision: 06789ccb1695214f787cd471a300522973d33375

URL: https://github.com/llvm/llvm-project/commit/06789ccb1695214f787cd471a300522973d33375
DIFF: https://github.com/llvm/llvm-project/commit/06789ccb1695214f787cd471a300522973d33375.diff

LOG: [libclc] Optimize ceil/fabs/floor/rint/trunc (#119596)

These functions all map to the corresponding LLVM intrinsics, but the
vector intrinsics weren't being generated. The intrinsic mapping from
CLC vector function to vector intrinsic was working correctly, but the
mapping from OpenCL builtin to CLC function was suboptimally recursively
splitting vectors in halves.

For example, with this change, `ceil(float16)` calls `llvm.ceil.v16f32`
directly once optimizations are applied.

Now also, instead of generating LLVM intrinsics through `__asm` we now
call clang elementwise builtins for each CLC builtin. This should be a
more standard way of achieving the same result

The CLC versions of each of these builtins are also now built and
enabled for SPIR-V targets. The LLVM -> SPIR-V translator maps the
intrinsics to the appropriate OpExtInst, so there should be no
difference in semantics, despite the newly introduced indirection from
OpenCL builtin through the CLC builtin to the intrinsic.

The AMDGPU targets make use of the same `_CLC_DEFINE_UNARY_BUILTIN`
macro to override `sqrt`, so those functions also appear more optimal
with this change, calling the vector `llvm.sqrt.vXf32` intrinsics
directly.

Added: 
    libclc/clc/include/clc/math/unary_builtin.inc
    libclc/clc/lib/generic/math/clc_ceil.cl
    libclc/clc/lib/generic/math/clc_fabs.cl
    libclc/clc/lib/generic/math/clc_floor.cl
    libclc/clc/lib/generic/math/clc_rint.cl
    libclc/clc/lib/generic/math/clc_trunc.cl

Modified: 
    libclc/clc/include/clc/clcmacro.h
    libclc/clc/include/clc/math/clc_ceil.h
    libclc/clc/include/clc/math/clc_fabs.h
    libclc/clc/include/clc/math/clc_floor.h
    libclc/clc/include/clc/math/clc_rint.h
    libclc/clc/include/clc/math/clc_trunc.h
    libclc/clc/lib/clspv/SOURCES
    libclc/clc/lib/generic/SOURCES
    libclc/clc/lib/spirv/SOURCES
    libclc/clc/lib/spirv64/SOURCES
    libclc/generic/lib/math/ceil.cl
    libclc/generic/lib/math/fabs.cl
    libclc/generic/lib/math/floor.cl
    libclc/generic/lib/math/rint.cl
    libclc/generic/lib/math/round.cl
    libclc/generic/lib/math/sqrt.cl
    libclc/generic/lib/math/trunc.cl

Removed: 
    libclc/clc/lib/clspv/dummy.cl
    libclc/generic/lib/math/unary_builtin.inc


################################################################################
diff  --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
index 244239284ecabc..c6583749eca661 100644

--- a/libclc/clc/include/clc/clcmacro.h
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -191,7 +191,21 @@
 
 #define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE)      \
   _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { return BUILTIN(x); } \
-  _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) {                \
+    return BUILTIN(x);                                                         \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) {                \
+    return BUILTIN(x);                                                         \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) {                \
+    return BUILTIN(x);                                                         \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) {                \
+    return BUILTIN(x);                                                         \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) {              \
+    return BUILTIN(x);                                                         \
+  }
 
 #ifdef cl_khr_fp16
 

diff  --git a/libclc/clc/include/clc/math/clc_ceil.h b/libclc/clc/include/clc/math/clc_ceil.h
index 66590687c34220..20adc6d81d863f 100644
--- a/libclc/clc/include/clc/math/clc_ceil.h
+++ b/libclc/clc/include/clc/math/clc_ceil.h
@@ -1,19 +1,12 @@
 #ifndef __CLC_MATH_CLC_CEIL_H__
 #define __CLC_MATH_CLC_CEIL_H__
 
-#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
-// clspv and spir-v targets provide their own OpenCL-compatible ceil
-#define __clc_ceil ceil
-#else
-
-// Map the function to an LLVM intrinsic
+#define __CLC_BODY <clc/math/unary_decl.inc>
 #define __CLC_FUNCTION __clc_ceil
-#define __CLC_INTRINSIC "llvm.ceil"
-#include <clc/math/unary_intrin.inc>
 
-#undef __CLC_INTRINSIC
-#undef __CLC_FUNCTION
+#include <clc/math/gentype.inc>
 
-#endif
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
 
 #endif // __CLC_MATH_CLC_CEIL_H__

diff  --git a/libclc/clc/include/clc/math/clc_fabs.h b/libclc/clc/include/clc/math/clc_fabs.h
index 93367b57313713..911d34f78c7d25 100644
--- a/libclc/clc/include/clc/math/clc_fabs.h
+++ b/libclc/clc/include/clc/math/clc_fabs.h
@@ -1,19 +1,12 @@
 #ifndef __CLC_MATH_CLC_FABS_H__
 #define __CLC_MATH_CLC_FABS_H__
 
-#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
-// clspv and spir-v targets provide their own OpenCL-compatible fabs
-#define __clc_fabs fabs
-#else
-
-// Map the function to an LLVM intrinsic
+#define __CLC_BODY <clc/math/unary_decl.inc>
 #define __CLC_FUNCTION __clc_fabs
-#define __CLC_INTRINSIC "llvm.fabs"
-#include <clc/math/unary_intrin.inc>
 
-#undef __CLC_INTRINSIC
-#undef __CLC_FUNCTION
+#include <clc/math/gentype.inc>
 
-#endif
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
 
 #endif // __CLC_MATH_CLC_FABS_H__

diff  --git a/libclc/clc/include/clc/math/clc_floor.h b/libclc/clc/include/clc/math/clc_floor.h
index 9919872ec633c6..c311cc0edae151 100644
--- a/libclc/clc/include/clc/math/clc_floor.h
+++ b/libclc/clc/include/clc/math/clc_floor.h
@@ -1,19 +1,12 @@
 #ifndef __CLC_MATH_CLC_FLOOR_H__
 #define __CLC_MATH_CLC_FLOOR_H__
 
-#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
-// clspv and spir-v targets provide their own OpenCL-compatible floor
-#define __clc_floor floor
-#else
-
-// Map the function to an LLVM intrinsic
+#define __CLC_BODY <clc/math/unary_decl.inc>
 #define __CLC_FUNCTION __clc_floor
-#define __CLC_INTRINSIC "llvm.floor"
-#include <clc/math/unary_intrin.inc>
 
-#undef __CLC_INTRINSIC
-#undef __CLC_FUNCTION
+#include <clc/math/gentype.inc>
 
-#endif
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
 
 #endif // __CLC_MATH_CLC_FLOOR_H__

diff  --git a/libclc/clc/include/clc/math/clc_rint.h b/libclc/clc/include/clc/math/clc_rint.h
index 3761407ad326d7..6faeed0b5696e5 100644
--- a/libclc/clc/include/clc/math/clc_rint.h
+++ b/libclc/clc/include/clc/math/clc_rint.h
@@ -1,19 +1,12 @@
 #ifndef __CLC_MATH_CLC_RINT_H__
 #define __CLC_MATH_CLC_RINT_H__
 
-#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
-// clspv and spir-v targets provide their own OpenCL-compatible rint
-#define __clc_rint rint
-#else
-
-// Map the function to an LLVM intrinsic
+#define __CLC_BODY <clc/math/unary_decl.inc>
 #define __CLC_FUNCTION __clc_rint
-#define __CLC_INTRINSIC "llvm.rint"
-#include <clc/math/unary_intrin.inc>
 
-#undef __CLC_INTRINSIC
-#undef __CLC_FUNCTION
+#include <clc/math/gentype.inc>
 
-#endif
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
 
 #endif // __CLC_MATH_CLC_RINT_H__

diff  --git a/libclc/clc/include/clc/math/clc_trunc.h b/libclc/clc/include/clc/math/clc_trunc.h
index c78c8899d85238..acfc9d5db48117 100644
--- a/libclc/clc/include/clc/math/clc_trunc.h
+++ b/libclc/clc/include/clc/math/clc_trunc.h
@@ -1,19 +1,12 @@
 #ifndef __CLC_MATH_CLC_TRUNC_H__
 #define __CLC_MATH_CLC_TRUNC_H__
 
-#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
-// clspv and spir-v targets provide their own OpenCL-compatible trunc
-#define __clc_trunc trunc
-#else
-
-// Map the function to an LLVM intrinsic
+#define __CLC_BODY <clc/math/unary_decl.inc>
 #define __CLC_FUNCTION __clc_trunc
-#define __CLC_INTRINSIC "llvm.trunc"
-#include <clc/math/unary_intrin.inc>
 
-#undef __CLC_INTRINSIC
-#undef __CLC_FUNCTION
+#include <clc/math/gentype.inc>
 
-#endif
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
 
 #endif // __CLC_MATH_CLC_TRUNC_H__

diff  --git a/libclc/generic/lib/math/unary_builtin.inc b/libclc/clc/include/clc/math/unary_builtin.inc
similarity index 100%
rename from libclc/generic/lib/math/unary_builtin.inc
rename to libclc/clc/include/clc/math/unary_builtin.inc

diff  --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
index 75a3130357c345..393e8d773cda0e 100644
--- a/libclc/clc/lib/clspv/SOURCES
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -1 +1,5 @@
-dummy.cl
+../generic/math/clc_ceil.cl
+../generic/math/clc_fabs.cl
+../generic/math/clc_floor.cl
+../generic/math/clc_rint.cl
+../generic/math/clc_trunc.cl

diff  --git a/libclc/clc/lib/clspv/dummy.cl b/libclc/clc/lib/clspv/dummy.cl
deleted file mode 100644
index fab17ac780e375..00000000000000
--- a/libclc/clc/lib/clspv/dummy.cl
+++ /dev/null
@@ -1 +0,0 @@
-// Empty file

diff  --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index d7ffaaf6dc3f42..3916ea15f5c458 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -1,6 +1,11 @@
 geometric/clc_dot.cl
 integer/clc_abs.cl
 integer/clc_abs_
diff .cl
+math/clc_ceil.cl
+math/clc_fabs.cl
+math/clc_floor.cl
+math/clc_rint.cl
+math/clc_trunc.cl
 relational/clc_all.cl
 relational/clc_any.cl
 relational/clc_bitselect.cl

diff  --git a/libclc/clc/lib/generic/math/clc_ceil.cl b/libclc/clc/lib/generic/math/clc_ceil.cl
new file mode 100644
index 00000000000000..c712e5fd024d90
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_ceil.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_ceil
+#define __CLC_BUILTIN __builtin_elementwise_ceil
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/clc/lib/generic/math/clc_fabs.cl b/libclc/clc/lib/generic/math/clc_fabs.cl
new file mode 100644
index 00000000000000..23ff3a7a187e1d
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_fabs.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_fabs
+#define __CLC_BUILTIN __builtin_elementwise_abs
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/clc/lib/generic/math/clc_floor.cl b/libclc/clc/lib/generic/math/clc_floor.cl
new file mode 100644
index 00000000000000..98345c768f2271
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_floor.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_floor
+#define __CLC_BUILTIN __builtin_elementwise_floor
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/clc/lib/generic/math/clc_rint.cl b/libclc/clc/lib/generic/math/clc_rint.cl
new file mode 100644
index 00000000000000..28ad321a7b4f6d
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_rint.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_rint
+#define __CLC_BUILTIN __builtin_elementwise_rint
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/clc/lib/generic/math/clc_trunc.cl b/libclc/clc/lib/generic/math/clc_trunc.cl
new file mode 100644
index 00000000000000..e62ae062e05020
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_trunc.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_trunc
+#define __CLC_BUILTIN __builtin_elementwise_trunc
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index d8effd19613c8b..3b29fa0a916243 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -1,2 +1,6 @@
 ../generic/geometric/clc_dot.cl
-
+../generic/math/clc_ceil.cl
+../generic/math/clc_fabs.cl
+../generic/math/clc_floor.cl
+../generic/math/clc_rint.cl
+../generic/math/clc_trunc.cl

diff  --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
index 9200810ace38e7..3b29fa0a916243 100644
--- a/libclc/clc/lib/spirv64/SOURCES
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -1 +1,6 @@
 ../generic/geometric/clc_dot.cl
+../generic/math/clc_ceil.cl
+../generic/math/clc_fabs.cl
+../generic/math/clc_floor.cl
+../generic/math/clc_rint.cl
+../generic/math/clc_trunc.cl

diff  --git a/libclc/generic/lib/math/ceil.cl b/libclc/generic/lib/math/ceil.cl
index e02789e694e06e..8df864a06314d8 100644
--- a/libclc/generic/lib/math/ceil.cl
+++ b/libclc/generic/lib/math/ceil.cl
@@ -4,4 +4,4 @@
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION ceil
-#include "unary_builtin.inc"
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/generic/lib/math/fabs.cl b/libclc/generic/lib/math/fabs.cl
index 9644369d4a0953..55701cb36a9512 100644
--- a/libclc/generic/lib/math/fabs.cl
+++ b/libclc/generic/lib/math/fabs.cl
@@ -4,4 +4,4 @@
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION fabs
-#include "unary_builtin.inc"
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/generic/lib/math/floor.cl b/libclc/generic/lib/math/floor.cl
index f5c36b73862a46..0854fa7efc4580 100644
--- a/libclc/generic/lib/math/floor.cl
+++ b/libclc/generic/lib/math/floor.cl
@@ -4,4 +4,4 @@
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION floor
-#include "unary_builtin.inc"
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/generic/lib/math/rint.cl b/libclc/generic/lib/math/rint.cl
index 185bbbbf8c91d2..ecf7d5c1e6dde8 100644
--- a/libclc/generic/lib/math/rint.cl
+++ b/libclc/generic/lib/math/rint.cl
@@ -3,4 +3,4 @@
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION rint
-#include "unary_builtin.inc"
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/generic/lib/math/round.cl b/libclc/generic/lib/math/round.cl
index 285328aaa5d563..6344051820c798 100644
--- a/libclc/generic/lib/math/round.cl
+++ b/libclc/generic/lib/math/round.cl
@@ -7,4 +7,4 @@
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION round
-#include "unary_builtin.inc"
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl
index 8df25dd45adb67..a9192a9493d172 100644
--- a/libclc/generic/lib/math/sqrt.cl
+++ b/libclc/generic/lib/math/sqrt.cl
@@ -24,4 +24,4 @@
 #include "math/clc_sqrt.h"
 
 #define __CLC_FUNCTION sqrt
-#include "unary_builtin.inc"
+#include <clc/math/unary_builtin.inc>

diff  --git a/libclc/generic/lib/math/trunc.cl b/libclc/generic/lib/math/trunc.cl
index 00c2a4a80015fe..1d5f04a3230541 100644
--- a/libclc/generic/lib/math/trunc.cl
+++ b/libclc/generic/lib/math/trunc.cl
@@ -3,4 +3,4 @@
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION trunc
-#include "unary_builtin.inc"
+#include <clc/math/unary_builtin.inc>