[libclc] b2bdd8b - [libclc] Create an internal 'clc' builtins library
Fraser Cormack via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 29 06:10:55 PDT 2024
Author: Fraser Cormack
Date: 2024-10-29T13:09:56Z
New Revision: b2bdd8bd39e90bfe3c66f6d5600468570a77ede6
URL: https://github.com/llvm/llvm-project/commit/b2bdd8bd39e90bfe3c66f6d5600468570a77ede6
DIFF: https://github.com/llvm/llvm-project/commit/b2bdd8bd39e90bfe3c66f6d5600468570a77ede6.diff
LOG: [libclc] Create an internal 'clc' builtins library
Some libclc builtins currently use internal builtins prefixed with
'__clc_' for various reasons, e.g., to avoid naming clashes.
This commit formalizes this concept by starting to isolate the
definitions of these internal clc builtins into a separate
self-contained bytecode library, which is linked into each target's
libclc OpenCL builtins before optimization takes place.
The goal of this step is to allow additional libraries of builtins
that provide entry points (or bindings) that are not written in OpenCL C
but still wish to expose OpenCL-compatible builtins. By moving the
implementations into a separate self-contained library, entry points can
share as much code as possible without going through OpenCL C.
The overall structure of the internal clc library is similar to the
current OpenCL structure, with SOURCES files and targets being able to
override the definitions of builtins as needed. The idea is that the
OpenCL builtins will begin to need fewer target-specific overrides, as
those will slowly move over to the clc builtins instead.
Another advantage of having a separate bytecode library with the CLC
implementations is that we can internalize the symbols when linking it
(separately), whereas currently the CLC symbols make it into the final
builtins library (and perhaps even the final compiled binary).
This patch starts of with 'dot' as it's relatively self-contained, as
opposed to most of the maths builtins which tend to pull in other
builtins.
We can also start to clang-format the builtins as we go, which should
help to modernize the codebase.
Added:
libclc/clc/include/clc/clcfunc.h
libclc/clc/include/clc/clctypes.h
libclc/clc/include/clc/geometric/clc_dot.h
libclc/clc/include/clc/geometric/clc_dot.inc
libclc/clc/include/clc/internal/clc.h
libclc/clc/lib/clspv/SOURCES
libclc/clc/lib/clspv/dummy.cl
libclc/clc/lib/clspv64
libclc/clc/lib/generic/SOURCES
libclc/clc/lib/generic/geometric/clc_dot.cl
libclc/clc/lib/spirv/SOURCES
libclc/clc/lib/spirv64/SOURCES
Modified:
libclc/CMakeLists.txt
libclc/cmake/modules/AddLibclc.cmake
libclc/generic/lib/geometric/dot.cl
Removed:
libclc/generic/include/clc/clcfunc.h
libclc/generic/include/clc/clctypes.h
################################################################################
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index ba04c0bc8618f2..16d74e53295cc1 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -29,7 +29,13 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
ptx-nvidiacl/lib/SOURCES;
r600/lib/SOURCES;
spirv/lib/SOURCES;
- spirv64/lib/SOURCES
+ spirv64/lib/SOURCES;
+ # CLC internal libraries
+ clc/lib/generic/SOURCES;
+ clc/lib/clspv/SOURCES;
+ clc/lib/clspv64/SOURCES;
+ clc/lib/spirv/SOURCES;
+ clc/lib/spirv64/SOURCES;
)
set( LIBCLC_MIN_LLVM 3.9.0 )
@@ -278,6 +284,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
set( DARCH ${ARCH} )
endif()
+ set( clc_lib_files )
+ libclc_configure_lib_source(
+ clc_lib_files
+ CLC_INTERNAL
+ LIB_ROOT_DIR clc
+ DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS}
+ )
+
set( opencl_lib_files )
set( opencl_gen_files )
@@ -326,7 +340,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
list( APPEND build_flags
-D__CLC_INTERNAL
-D${CLC_TARGET_DEFINE}
- -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include
+ # All libclc builtin libraries see CLC headers
+ -I${CMAKE_CURRENT_SOURCE_DIR}/clc/include
# FIXME: Fix libclc to not require disabling this noisy warning
-Wno-bitwise-conditional-parentheses
)
@@ -335,6 +350,20 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
list( APPEND build_flags -mcpu=${cpu} )
endif()
+ add_libclc_builtin_set(
+ CLC_INTERNAL
+ ARCH ${ARCH}
+ ARCH_SUFFIX clc-${arch_suffix}
+ TRIPLE ${clang_triple}
+ COMPILE_FLAGS ${build_flags}
+ OPT_FLAGS ${opt_flags}
+ LIB_FILES ${clc_lib_files}
+ )
+
+ list( APPEND build_flags
+ -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include
+ )
+
add_libclc_builtin_set(
ARCH ${ARCH}
ARCH_SUFFIX ${arch_suffix}
@@ -344,6 +373,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
LIB_FILES ${opencl_lib_files}
GEN_FILES ${opencl_gen_files}
ALIASES ${${d}_aliases}
+ # Link in the CLC builtins and internalize their symbols
+ INTERNAL_LINK_DEPENDENCIES $<TARGET_PROPERTY:builtins.link.clc-${arch_suffix},TARGET_FILE>
)
endforeach( d )
endforeach( t )
diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/clc/include/clc/clcfunc.h
similarity index 85%
rename from libclc/generic/include/clc/clcfunc.h
rename to libclc/clc/include/clc/clcfunc.h
index 086d780b970859..fe3406f64fecb8 100644
--- a/libclc/generic/include/clc/clcfunc.h
+++ b/libclc/clc/include/clc/clcfunc.h
@@ -1,3 +1,6 @@
+#ifndef __CLC_CLCFUNC_H_
+#define __CLC_CLCFUNC_H_
+
#define _CLC_OVERLOAD __attribute__((overloadable))
#define _CLC_DECL
#define _CLC_INLINE __attribute__((always_inline)) inline
@@ -11,3 +14,5 @@
#else
#define _CLC_DEF __attribute__((always_inline))
#endif
+
+#endif // __CLC_CLCFUNC_H_
diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/clc/include/clc/clctypes.h
similarity index 94%
rename from libclc/generic/include/clc/clctypes.h
rename to libclc/clc/include/clc/clctypes.h
index 76b816d395c288..8ededd967e0033 100644
--- a/libclc/generic/include/clc/clctypes.h
+++ b/libclc/clc/include/clc/clctypes.h
@@ -1,3 +1,6 @@
+#ifndef __CLC_CLCTYPES_H_
+#define __CLC_CLCTYPES_H_
+
/* 6.1.1 Built-in Scalar Data Types */
typedef unsigned char uchar;
@@ -8,12 +11,12 @@ typedef unsigned long ulong;
typedef __SIZE_TYPE__ size_t;
typedef __PTRDIFF_TYPE__ ptr
diff _t;
-#define __stdint_join3(a,b,c) a ## b ## c
+#define __stdint_join3(a, b, c) a##b##c
-#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__)
+#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__)
#define __uintn_t(n) __stdint_join3(unsigned __INT, n, _TYPE__)
-typedef __intn_t(__INTPTR_WIDTH__) intptr_t;
+typedef __intn_t(__INTPTR_WIDTH__) intptr_t;
typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t;
#undef __uintn_t
@@ -93,3 +96,5 @@ typedef __attribute__((ext_vector_type(4))) half half4;
typedef __attribute__((ext_vector_type(8))) half half8;
typedef __attribute__((ext_vector_type(16))) half half16;
#endif
+
+#endif // __CLC_CLCTYPES_H_
diff --git a/libclc/clc/include/clc/geometric/clc_dot.h b/libclc/clc/include/clc/geometric/clc_dot.h
new file mode 100644
index 00000000000000..e0e47ab2093efd
--- /dev/null
+++ b/libclc/clc/include/clc/geometric/clc_dot.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/geometric/clc_dot.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/clc/include/clc/geometric/clc_dot.inc b/libclc/clc/include/clc/geometric/clc_dot.inc
new file mode 100644
index 00000000000000..016b564df362d2
--- /dev/null
+++ b/libclc/clc/include/clc/geometric/clc_dot.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT __clc_dot(__CLC_FLOATN p0, __CLC_FLOATN p1);
diff --git a/libclc/clc/include/clc/internal/clc.h b/libclc/clc/include/clc/internal/clc.h
new file mode 100644
index 00000000000000..c3bdfd754105f7
--- /dev/null
+++ b/libclc/clc/include/clc/internal/clc.h
@@ -0,0 +1,26 @@
+#ifndef __CLC_INTERNAL_CLC_H_
+#define __CLC_INTERNAL_CLC_H_
+
+#ifndef cl_clang_storage_class_specifiers
+#error Implementation requires cl_clang_storage_class_specifiers extension!
+#endif
+
+#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif
+
+/* Function Attributes */
+#include <clc/clcfunc.h>
+
+/* 6.1 Supported Data Types */
+#include <clc/clctypes.h>
+
+#pragma OPENCL EXTENSION all : disable
+
+#endif // __CLC_INTERNAL_CLC_H_
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
new file mode 100644
index 00000000000000..75a3130357c345
--- /dev/null
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -0,0 +1 @@
+dummy.cl
diff --git a/libclc/clc/lib/clspv/dummy.cl b/libclc/clc/lib/clspv/dummy.cl
new file mode 100644
index 00000000000000..fab17ac780e375
--- /dev/null
+++ b/libclc/clc/lib/clspv/dummy.cl
@@ -0,0 +1 @@
+// Empty file
diff --git a/libclc/clc/lib/clspv64 b/libclc/clc/lib/clspv64
new file mode 120000
index 00000000000000..ea01ba94bc6368
--- /dev/null
+++ b/libclc/clc/lib/clspv64
@@ -0,0 +1 @@
+clspv
\ No newline at end of file
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
new file mode 100644
index 00000000000000..fa2e4f50b99cd7
--- /dev/null
+++ b/libclc/clc/lib/generic/SOURCES
@@ -0,0 +1 @@
+geometric/clc_dot.cl
diff --git a/libclc/clc/lib/generic/geometric/clc_dot.cl b/libclc/clc/lib/generic/geometric/clc_dot.cl
new file mode 100644
index 00000000000000..bf0f19b51bc05e
--- /dev/null
+++ b/libclc/clc/lib/generic/geometric/clc_dot.cl
@@ -0,0 +1,57 @@
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; }
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) {
+ return p0 * p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; }
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#endif
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
new file mode 100644
index 00000000000000..d8effd19613c8b
--- /dev/null
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -0,0 +1,2 @@
+../generic/geometric/clc_dot.cl
+
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
new file mode 100644
index 00000000000000..9200810ace38e7
--- /dev/null
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -0,0 +1 @@
+../generic/geometric/clc_dot.cl
diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake
index 147f06bc9a9afc..ee7c8500c8359f 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -76,6 +76,8 @@ endfunction()
# Links together one or more bytecode files
#
# Arguments:
+# * INTERNALIZE
+# Set if -internalize flag should be passed when linking
# * TARGET <string>
# Custom target to create
# * INPUT <string> ...
@@ -84,7 +86,7 @@ endfunction()
# List of extra dependencies to inject
function(link_bc)
cmake_parse_arguments(ARG
- ""
+ "INTERNALIZE"
"TARGET"
"INPUTS;DEPENDENCIES"
${ARGN}
@@ -97,7 +99,7 @@ function(link_bc)
file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE )
# Turn it into a space-separate list of input files
list( JOIN ARG_INPUTS " " RSP_INPUT )
- file( WRITE ${RSP_FILE} ${RSP_INPUT} )
+ file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} )
# Ensure that if this file is removed, we re-run CMake
set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
${RSP_FILE}
@@ -107,7 +109,7 @@ function(link_bc)
add_custom_command(
OUTPUT ${ARG_TARGET}.bc
- COMMAND ${llvm-link_exe} -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
+ COMMAND ${llvm-link_exe} $<$<BOOL:${ARG_INTERNALIZE}>:--internalize> -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
DEPENDS ${llvm-link_target} ${ARG_DEPENDENCIES} ${ARG_INPUTS} ${RSP_FILE}
)
@@ -195,6 +197,9 @@ endfunction()
# Triple used to compile
#
# Optional Arguments:
+# * CLC_INTERNAL
+# Pass if compiling the internal CLC builtin libraries, which are not
+# optimized and do not have aliases created.
# * LIB_FILES <string> ...
# List of files that should be built for this library
# * GEN_FILES <string> ...
@@ -205,11 +210,14 @@ endfunction()
# Optimization options (for opt)
# * ALIASES <string> ...
# List of aliases
+# * INTERNAL_LINK_DEPENDENCIES <string> ...
+# A list of extra bytecode files to link into the builtin library. Symbols
+# from these link dependencies will be internalized during linking.
function(add_libclc_builtin_set)
cmake_parse_arguments(ARG
- ""
+ "CLC_INTERNAL"
"ARCH;TRIPLE;ARCH_SUFFIX"
- "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES"
+ "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES;INTERNAL_LINK_DEPENDENCIES"
${ARGN}
)
@@ -258,12 +266,42 @@ function(add_libclc_builtin_set)
)
set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" )
+ if( NOT bytecode_files )
+ message(FATAL_ERROR "Cannot create an empty builtins library")
+ endif()
+
set( builtins_link_lib_tgt builtins.link.${ARG_ARCH_SUFFIX} )
- link_bc(
- TARGET ${builtins_link_lib_tgt}
- INPUTS ${bytecode_files}
- DEPENDENCIES ${builtins_comp_lib_tgt}
- )
+
+ if( NOT ARG_INTERNAL_LINK_DEPENDENCIES )
+ link_bc(
+ TARGET ${builtins_link_lib_tgt}
+ INPUTS ${bytecode_files}
+ DEPENDENCIES ${builtins_comp_lib_tgt}
+ )
+ else()
+ # If we have libraries to link while internalizing their symbols, we need
+ # two separate link steps; the --internalize flag applies to all link
+ # inputs but the first.
+ set( builtins_link_lib_tmp_tgt builtins.link.pre-deps.${ARG_ARCH_SUFFIX} )
+ link_bc(
+ TARGET ${builtins_link_lib_tmp_tgt}
+ INPUTS ${bytecode_files}
+ DEPENDENCIES ${builtins_comp_lib_tgt}
+ )
+ link_bc(
+ INTERNALIZE
+ TARGET ${builtins_link_lib_tgt}
+ INPUTS $<TARGET_PROPERTY:${builtins_link_lib_tmp_tgt},TARGET_FILE>
+ ${ARG_INTERNAL_LINK_DEPENDENCIES}
+ DEPENDENCIES ${builtins_link_lib_tmp_tgt}
+ )
+ endif()
+
+ # For the CLC internal builtins, exit here - we only optimize the targets'
+ # entry points once we've linked the CLC buitins into them
+ if( ARG_CLC_INTERNAL )
+ return()
+ endif()
set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> )
@@ -331,6 +369,9 @@ endfunction(add_libclc_builtin_set)
# LIB_FILE_LIST may be pre-populated and is appended to.
#
# Arguments:
+# * CLC_INTERNAL
+# Pass if compiling the internal CLC builtin libraries, which have a
+#
diff erent directory structure.
# * LIB_ROOT_DIR <string>
# Root directory containing target's lib files, relative to libclc root
# directory. If not provided, is set to '.'.
@@ -339,7 +380,7 @@ endfunction(add_libclc_builtin_set)
# files
function(libclc_configure_lib_source LIB_FILE_LIST)
cmake_parse_arguments(ARG
- ""
+ "CLC_INTERNAL"
"LIB_ROOT_DIR"
"DIRS"
${ARGN}
@@ -353,7 +394,11 @@ function(libclc_configure_lib_source LIB_FILE_LIST)
set( source_list )
foreach( l ${ARG_DIRS} )
foreach( s "SOURCES" "SOURCES_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}" )
- file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc )
+ if( ARG_CLC_INTERNAL )
+ file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/lib/${l}/${s} file_loc )
+ else()
+ file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc )
+ endif()
file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc )
# Prepend the location to give higher priority to
# specialized implementation
diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl
index e58bc26f4333a7..e790d02636563c 100644
--- a/libclc/generic/lib/geometric/dot.cl
+++ b/libclc/generic/lib/geometric/dot.cl
@@ -1,19 +1,20 @@
#include <clc/clc.h>
+#include <clc/geometric/clc_dot.h>
_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
- return p0*p1;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
- return p0.x*p1.x + p0.y*p1.y;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+ return __clc_dot(p0, p1);
}
#ifdef cl_khr_fp64
@@ -21,19 +22,19 @@ _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
- return p0*p1;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
- return p0.x*p1.x + p0.y*p1.y;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+ return __clc_dot(p0, p1);
}
#endif
@@ -42,20 +43,18 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) {
- return p0*p1;
-}
+_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); }
_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
- return p0.x*p1.x + p0.y*p1.y;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+ return __clc_dot(p0, p1);
}
#endif
More information about the cfe-commits
mailing list