[libclc] 28d9255 - [libclc] Override generic symbol using llvm-link --override flag instead of using weak linkage (#156778)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 5 04:58:10 PDT 2025
Author: Wenju He
Date: 2025-09-05T19:58:07+08:00
New Revision: 28d9255aa7c05738c7fd88711006d71d4dfc952a
URL: https://github.com/llvm/llvm-project/commit/28d9255aa7c05738c7fd88711006d71d4dfc952a
DIFF: https://github.com/llvm/llvm-project/commit/28d9255aa7c05738c7fd88711006d71d4dfc952a.diff
LOG: [libclc] Override generic symbol using llvm-link --override flag instead of using weak linkage (#156778)
Before this PR, weak linkage is applied to a few CLC generic functions
to allow target specific implementation to override generic one.
However, adding weak linkage has a side effect of preventing
inter-procedural optimization, such as PostOrderFunctionAttrsPass,
because weak function doesn't have exact definition (as determined by
hasExactDefinition in the pass).
This PR resolves the issue by adding --override flag for every
non-generic bitcode file in llvm-link run. This approach eliminates the
need for weak linkage while still allowing target-specific
implementation to override generic one.
llvm-diff shows imporoved attribute deduction for some functions in
amdgcn--amdhsa.bc, e.g.
%23 = tail call half @llvm.sqrt.f16(half %22)
=>
%23 = tail call noundef half @llvm.sqrt.f16(half %22)
Added:
Modified:
libclc/clc/lib/generic/math/clc_ldexp.cl
libclc/clc/lib/generic/math/clc_rsqrt.inc
libclc/clc/lib/generic/math/clc_sqrt.inc
libclc/cmake/modules/AddLibclc.cmake
Removed:
################################################################################
diff --git a/libclc/clc/lib/generic/math/clc_ldexp.cl b/libclc/clc/lib/generic/math/clc_ldexp.cl
index 8b41751e40282..f9252a75ab4bf 100644
--- a/libclc/clc/lib/generic/math/clc_ldexp.cl
+++ b/libclc/clc/lib/generic/math/clc_ldexp.cl
@@ -14,9 +14,7 @@
#include <clc/relational/clc_isnan.h>
#include <clc/shared/clc_clamp.h>
-#define _CLC_DEF_ldexp _CLC_DEF __attribute__((weak))
-
-_CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
+_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
if (!__clc_fp32_subnormals_supported()) {
// This treats subnormals as zeros
@@ -89,7 +87,7 @@ _CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-_CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
+_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
long l = __clc_as_ulong(x);
int e = (l >> 52) & 0x7ff;
long s = l & 0x8000000000000000;
@@ -124,14 +122,13 @@ _CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_OVERLOAD _CLC_DEF_ldexp half __clc_ldexp(half x, int n) {
+_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) {
return (half)__clc_ldexp((float)x, n);
}
#endif
#define __CLC_FUNCTION __clc_ldexp
-#define __CLC_DEF_SPEC _CLC_DEF_ldexp
#define __CLC_ARG2_TYPE int
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_rsqrt.inc b/libclc/clc/lib/generic/math/clc_rsqrt.inc
index 4c04155a932c7..07aad16f91916 100644
--- a/libclc/clc/lib/generic/math/clc_rsqrt.inc
+++ b/libclc/clc/lib/generic/math/clc_rsqrt.inc
@@ -6,8 +6,7 @@
//
//===----------------------------------------------------------------------===//
-__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
-__clc_rsqrt(__CLC_GENTYPE val) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rsqrt(__CLC_GENTYPE val) {
#pragma clang fp contract(fast)
return __CLC_FP_LIT(1.0) / __builtin_elementwise_sqrt(val);
}
diff --git a/libclc/clc/lib/generic/math/clc_sqrt.inc b/libclc/clc/lib/generic/math/clc_sqrt.inc
index 61e341993f5c8..e15dcf75ac3f2 100644
--- a/libclc/clc/lib/generic/math/clc_sqrt.inc
+++ b/libclc/clc/lib/generic/math/clc_sqrt.inc
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
-__clc_sqrt(__CLC_GENTYPE val) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
return __builtin_elementwise_sqrt(val);
}
diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake
index 5cc202ddbaa8c..aa8dd9859cd22 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -92,19 +92,35 @@ function(link_bc)
${ARGN}
)
- set( LINK_INPUT_ARG ${ARG_INPUTS} )
+ if( ARG_INTERNALIZE )
+ set( inputs_with_flag ${ARG_INPUTS} )
+ else()
+ # Add the --override flag for non-generic bitcode files so that their
+ # symbols can override definitions in generic bitcode files.
+ set( inputs_with_flag )
+ foreach( file IN LISTS ARG_INPUTS )
+ string( FIND ${file} "/generic/" is_generic )
+ if( is_generic LESS 0 )
+ list( APPEND inputs_with_flag "--override" )
+ endif()
+ list( APPEND inputs_with_flag ${file} )
+ endforeach()
+ endif()
+
if( WIN32 OR CYGWIN )
# Create a response file in case the number of inputs exceeds command-line
# character limits on certain platforms.
file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE )
# Turn it into a space-separate list of input files
- list( JOIN ARG_INPUTS " " RSP_INPUT )
+ list( JOIN inputs_with_flag " " RSP_INPUT )
file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} )
# Ensure that if this file is removed, we re-run CMake
set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
${RSP_FILE}
)
set( LINK_INPUT_ARG "@${RSP_FILE}" )
+ else()
+ set( LINK_INPUT_ARG ${inputs_with_flag} )
endif()
if( ARG_INTERNALIZE )
More information about the cfe-commits
mailing list