[libclc] b7e2014 - [libclc] Move smoothstep to CLC and optimize its codegen (#123183)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 16 03:44:13 PST 2025
Author: Fraser Cormack
Date: 2025-01-16T11:44:09Z
New Revision: b7e20147ad7c29f9624d2a071bd348a7acd63461
URL: https://github.com/llvm/llvm-project/commit/b7e20147ad7c29f9624d2a071bd348a7acd63461
DIFF: https://github.com/llvm/llvm-project/commit/b7e20147ad7c29f9624d2a071bd348a7acd63461.diff
LOG: [libclc] Move smoothstep to CLC and optimize its codegen (#123183)
This commit moves the implementation of the smoothstep function to the
CLC library, whilst optimizing the codegen.
This commit also adds support for 'half' versions of smoothstep, which
were previously missing.
The CLC smoothstep implementation now keeps everything in vectors,
rather than recursively splitting vectors by half down to the scalar
base form. This should result in more optimal codegen across the board.
This commit also removes some non-standard overloads of smoothstep with
mixed types, such as 'double smoothstep(float, float, float)'. There
aren't any mixed-(element )type versions of smoothstep as far as I can
see:
gentype smoothstep(gentype edge0, gentype edge1, gentype x)
gentypef smoothstep(float edge0, float edge1, gentypef x)
gentyped smoothstep(double edge0, double edge1, gentyped x)
gentypeh smoothstep(half edge0, half edge1, gentypeh x)
The CLC library only defines the first type, for simplicity; the OpenCL
layer is responsible for handling the scalar/scalar/vector forms. Note
that the scalar/scalar/vector forms now splat the scalars to the vector
type, rather than recursively split vectors as before. The macro that
used to 'vectorize' smoothstep in this way has been moved out of the
shared clcmacro.h header as it was only used for the smoothstep builtin.
Note that the CLC clamp function is now built for both SPIR-V targets.
This is to help build the CLC smoothstep function for the Mesa SPIR-V
target.
Added:
libclc/clc/include/clc/common/clc_smoothstep.h
libclc/clc/include/clc/common/clc_smoothstep.inc
libclc/clc/lib/generic/common/clc_smoothstep.cl
Modified:
libclc/clc/include/clc/clcmacro.h
libclc/clc/include/clc/shared/clc_clamp.h
libclc/clc/lib/clspv/SOURCES
libclc/clc/lib/generic/SOURCES
libclc/clc/lib/spirv/SOURCES
libclc/clc/lib/spirv64/SOURCES
libclc/generic/lib/common/smoothstep.cl
Removed:
################################################################################
diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
index c6583749eca661..3c3a69f4f848bb 100644
--- a/libclc/clc/include/clc/clcmacro.h
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -102,29 +102,6 @@
FUNCTION(x.hi, y.hi, z.hi)); \
}
-#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
- ARG2_TYPE, ARG3_TYPE) \
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
- return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- } \
- \
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
- return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
- FUNCTION(x, y, z.z)); \
- } \
- \
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
- return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- } \
- \
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
- return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- } \
- \
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
- return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- }
-
#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ADDR_SPACE, ARG2_TYPE) \
DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
diff --git a/libclc/clc/include/clc/common/clc_smoothstep.h b/libclc/clc/include/clc/common/clc_smoothstep.h
new file mode 100644
index 00000000000000..fa212245e07946
--- /dev/null
+++ b/libclc/clc/include/clc/common/clc_smoothstep.h
@@ -0,0 +1,11 @@
+#ifndef __CLC_COMMON_CLC_SMOOTHSTEP_H__
+#define __CLC_COMMON_CLC_SMOOTHSTEP_H__
+
+// note: Unlike OpenCL __clc_smoothstep is only defined for three matching
+// argument types.
+
+#define __CLC_BODY <clc/common/clc_smoothstep.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
+
+#endif // __CLC_COMMON_CLC_SMOOTHSTEP_H__
diff --git a/libclc/clc/include/clc/common/clc_smoothstep.inc b/libclc/clc/include/clc/common/clc_smoothstep.inc
new file mode 100644
index 00000000000000..3ce33c5573f6c1
--- /dev/null
+++ b/libclc/clc/include/clc/common/clc_smoothstep.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_smoothstep(__CLC_GENTYPE edge0,
+ __CLC_GENTYPE edge1,
+ __CLC_GENTYPE x);
diff --git a/libclc/clc/include/clc/shared/clc_clamp.h b/libclc/clc/include/clc/shared/clc_clamp.h
index d9d39413c5618d..7fd22771c09c03 100644
--- a/libclc/clc/include/clc/shared/clc_clamp.h
+++ b/libclc/clc/include/clc/shared/clc_clamp.h
@@ -1,17 +1,10 @@
#ifndef __CLC_SHARED_CLC_CLAMP_H__
#define __CLC_SHARED_CLC_CLAMP_H__
-#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
-// clspv and spir-v targets provide their own OpenCL-compatible clamp
-#define __clc_clamp clamp
-#else
-
#define __CLC_BODY <clc/shared/clc_clamp.inc>
#include <clc/integer/gentype.inc>
#define __CLC_BODY <clc/shared/clc_clamp.inc>
#include <clc/math/gentype.inc>
-#endif
-
#endif // __CLC_SHARED_CLC_CLAMP_H__
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
index 393e8d773cda0e..e6573f586080cf 100644
--- a/libclc/clc/lib/clspv/SOURCES
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -3,3 +3,4 @@
../generic/math/clc_floor.cl
../generic/math/clc_rint.cl
../generic/math/clc_trunc.cl
+../generic/shared/clc_clamp.cl
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 3916ea15f5c458..f3097de6944221 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -1,3 +1,4 @@
+common/clc_smoothstep.cl
geometric/clc_dot.cl
integer/clc_abs.cl
integer/clc_abs_
diff .cl
diff --git a/libclc/clc/lib/generic/common/clc_smoothstep.cl b/libclc/clc/lib/generic/common/clc_smoothstep.cl
new file mode 100644
index 00000000000000..ea0e9ed3bb19c5
--- /dev/null
+++ b/libclc/clc/lib/generic/common/clc_smoothstep.cl
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <clc/clcmacro.h>
+#include <clc/internal/clc.h>
+#include <clc/shared/clc_clamp.h>
+
+#define SMOOTHSTEP_SINGLE_DEF(edge_type, x_type, lit_suff) \
+ _CLC_OVERLOAD _CLC_DEF x_type __clc_smoothstep(edge_type edge0, \
+ edge_type edge1, x_type x) { \
+ x_type t = __clc_clamp((x - edge0) / (edge1 - edge0), 0.0##lit_suff, \
+ 1.0##lit_suff); \
+ return t * t * (3.0##lit_suff - 2.0##lit_suff * t); \
+ }
+
+#define SMOOTHSTEP_DEF(type, lit_suffix) \
+ SMOOTHSTEP_SINGLE_DEF(type, type, lit_suffix) \
+ SMOOTHSTEP_SINGLE_DEF(type##2, type##2, lit_suffix) \
+ SMOOTHSTEP_SINGLE_DEF(type##3, type##3, lit_suffix) \
+ SMOOTHSTEP_SINGLE_DEF(type##4, type##4, lit_suffix) \
+ SMOOTHSTEP_SINGLE_DEF(type##8, type##8, lit_suffix) \
+ SMOOTHSTEP_SINGLE_DEF(type##16, type##16, lit_suffix)
+
+SMOOTHSTEP_DEF(float, F)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+SMOOTHSTEP_DEF(double, );
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+SMOOTHSTEP_DEF(half, H);
+#endif
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index 3b29fa0a916243..02784b8def682b 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -1,6 +1,8 @@
+../generic/common/clc_smoothstep.cl
../generic/geometric/clc_dot.cl
../generic/math/clc_ceil.cl
../generic/math/clc_fabs.cl
../generic/math/clc_floor.cl
../generic/math/clc_rint.cl
../generic/math/clc_trunc.cl
+../generic/shared/clc_clamp.cl
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
index 3b29fa0a916243..02784b8def682b 100644
--- a/libclc/clc/lib/spirv64/SOURCES
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -1,6 +1,8 @@
+../generic/common/clc_smoothstep.cl
../generic/geometric/clc_dot.cl
../generic/math/clc_ceil.cl
../generic/math/clc_fabs.cl
../generic/math/clc_floor.cl
../generic/math/clc_rint.cl
../generic/math/clc_trunc.cl
+../generic/shared/clc_clamp.cl
diff --git a/libclc/generic/lib/common/smoothstep.cl b/libclc/generic/lib/common/smoothstep.cl
index 4cdecfc4abe26e..78d62044f439b8 100644
--- a/libclc/generic/lib/common/smoothstep.cl
+++ b/libclc/generic/lib/common/smoothstep.cl
@@ -22,35 +22,61 @@
#include <clc/clc.h>
#include <clc/clcmacro.h>
+#include <clc/common/clc_smoothstep.h>
-_CLC_OVERLOAD _CLC_DEF float smoothstep(float edge0, float edge1, float x) {
- float t = clamp((x - edge0) / (edge1 - edge0), 0.0f, 1.0f);
- return t * t * (3.0f - 2.0f * t);
-}
+#define SMOOTHSTEP_SINGLE_DEF(X_TYPE) \
+ _CLC_OVERLOAD _CLC_DEF X_TYPE smoothstep(X_TYPE edge0, X_TYPE edge1, \
+ X_TYPE x) { \
+ return __clc_smoothstep(edge0, edge1, x); \
+ }
-_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, smoothstep, float, float, float);
+#define SMOOTHSTEP_S_S_V_DEFS(X_TYPE) \
+ _CLC_OVERLOAD _CLC_DEF X_TYPE##2 smoothstep(X_TYPE x, X_TYPE y, \
+ X_TYPE##2 z) { \
+ return __clc_smoothstep((X_TYPE##2)x, (X_TYPE##2)y, z); \
+ } \
+ \
+ _CLC_OVERLOAD _CLC_DEF X_TYPE##3 smoothstep(X_TYPE x, X_TYPE y, \
+ X_TYPE##3 z) { \
+ return __clc_smoothstep((X_TYPE##3)x, (X_TYPE##3)y, z); \
+ } \
+ \
+ _CLC_OVERLOAD _CLC_DEF X_TYPE##4 smoothstep(X_TYPE x, X_TYPE y, \
+ X_TYPE##4 z) { \
+ return __clc_smoothstep((X_TYPE##4)x, (X_TYPE##4)y, z); \
+ } \
+ \
+ _CLC_OVERLOAD _CLC_DEF X_TYPE##8 smoothstep(X_TYPE x, X_TYPE y, \
+ X_TYPE##8 z) { \
+ return __clc_smoothstep((X_TYPE##8)x, (X_TYPE##8)y, z); \
+ } \
+ \
+ _CLC_OVERLOAD _CLC_DEF X_TYPE##16 smoothstep(X_TYPE x, X_TYPE y, \
+ X_TYPE##16 z) { \
+ return __clc_smoothstep((X_TYPE##16)x, (X_TYPE##16)y, z); \
+ }
-_CLC_V_S_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, smoothstep, float, float, float);
+#define SMOOTHSTEP_DEF(type) \
+ SMOOTHSTEP_SINGLE_DEF(type) \
+ SMOOTHSTEP_SINGLE_DEF(type##2) \
+ SMOOTHSTEP_SINGLE_DEF(type##3) \
+ SMOOTHSTEP_SINGLE_DEF(type##4) \
+ SMOOTHSTEP_SINGLE_DEF(type##8) \
+ SMOOTHSTEP_SINGLE_DEF(type##16) \
+ SMOOTHSTEP_S_S_V_DEFS(type)
+
+SMOOTHSTEP_DEF(float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-#define SMOOTH_STEP_DEF(edge_type, x_type, impl) \
- _CLC_OVERLOAD _CLC_DEF x_type smoothstep(edge_type edge0, edge_type edge1, x_type x) { \
- double t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0); \
- return t * t * (3.0 - 2.0 * t); \
- }
-
-SMOOTH_STEP_DEF(double, double, SMOOTH_STEP_IMPL_D);
+SMOOTHSTEP_DEF(double);
-_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, double, double, double);
+#endif
-#if !defined(CLC_SPIRV)
-SMOOTH_STEP_DEF(float, double, SMOOTH_STEP_IMPL_D);
-SMOOTH_STEP_DEF(double, float, SMOOTH_STEP_IMPL_D);
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_V_S_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, float, float, double);
-_CLC_V_S_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, smoothstep, double, double, float);
-#endif
+SMOOTHSTEP_DEF(half);
#endif
More information about the cfe-commits
mailing list